diff options
author | Jessica Paquette <jpaquette@apple.com> | 2020-12-04 15:51:44 -0800 |
---|---|---|
committer | Tom Stellard <tstellar@redhat.com> | 2020-12-15 18:28:58 -0500 |
commit | 280e47ea0e837b809be03f2048ac8abc14dbc387 (patch) | |
tree | c42f26b214be904e7b7b325642bb6ca4bfffd9df | |
parent | [AArch64][GlobalISel] Look through a G_ZEXT when trying to match shift-extend... (diff) | |
download | llvm-project-280e47ea0e837b809be03f2048ac8abc14dbc387.tar.gz llvm-project-280e47ea0e837b809be03f2048ac8abc14dbc387.tar.bz2 llvm-project-280e47ea0e837b809be03f2048ac8abc14dbc387.zip |
[AArch64][GlobalISel] Narrow 128-bit regs to 64-bit regs in emitTestBit
When we have a 128-bit register, emitTestBit would incorrectly narrow to 32
bits always. If the bit number was > 32, then we would need a TB(N)ZX. This
would cause a crash, as we'd have the wrong register class. (PR48379)
This generalizes `narrowExtReg` into `moveScalarRegClass`.
This also allows us to remove `widenGPRBankRegIfNeeded` entirely, since
`selectCopy` correctly handles SUBREG_TO_REG etc.
This does create some codegen changes (since `selectCopy` uses the `all`
regclass variants). However, I think that these will likely be optimized away,
and we can always improve the `selectCopy` code. It looks like we should
revisit `selectCopy` at this point, and possibly refactor it into at least one
`emit` function.
Differential Revision: https://reviews.llvm.org/D92707
(cherry picked from commit 195a7af0abb26915f962462f69c0f17e3835f78b)
4 files changed, 67 insertions, 77 deletions
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 90bab9603245..7733fe7f7b24 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -289,14 +289,15 @@ private: getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore = false) const; - /// Instructions that accept extend modifiers like UXTW expect the register - /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a - /// subregister copy if necessary. Return either ExtReg, or the result of the - /// new copy. - Register narrowExtendRegIfNeeded(Register ExtReg, - MachineIRBuilder &MIB) const; - Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size, - MachineIRBuilder &MIB) const; + /// Move \p Reg to \p RC if \p Reg is not already on \p RC. + /// + /// \returns Either \p Reg if no change was necessary, or the new register + /// created by moving \p Reg. + /// + /// Note: This uses emitCopy right now. + Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC, + MachineIRBuilder &MIB) const; + ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, @@ -1195,10 +1196,10 @@ MachineInstr *AArch64InstructionSelector::emitTestBit( // TBNZW work. bool UseWReg = Bit < 32; unsigned NecessarySize = UseWReg ? 32 : 64; - if (Size < NecessarySize) - TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB); - else if (Size > NecessarySize) - TestReg = narrowExtendRegIfNeeded(TestReg, MIB); + if (Size != NecessarySize) + TestReg = moveScalarRegClass( + TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass, + MIB); static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, {AArch64::TBZW, AArch64::TBNZW}}; @@ -4984,7 +4985,7 @@ AArch64InstructionSelector::selectExtendedSHL( // Need a 32-bit wide register here. MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg())); - OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB); + OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB); } // We can use the LHS of the GEP as the base, and the LHS of the shift as an @@ -5156,8 +5157,8 @@ AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, // Need a 32-bit wide register. MachineIRBuilder MIB(*PtrAdd); - Register ExtReg = - narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB); + Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(), + AArch64::GPR32RegClass, MIB); unsigned SignExtend = Ext == AArch64_AM::SXTW; // Base is LHS, offset is ExtReg. @@ -5431,67 +5432,21 @@ AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( } } -Register AArch64InstructionSelector::narrowExtendRegIfNeeded( - Register ExtReg, MachineIRBuilder &MIB) const { +Register AArch64InstructionSelector::moveScalarRegClass( + Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const { MachineRegisterInfo &MRI = *MIB.getMRI(); - if (MRI.getType(ExtReg).getSizeInBits() == 32) - return ExtReg; - - // Insert a copy to move ExtReg to GPR32. - Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); - auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg}); + auto Ty = MRI.getType(Reg); + assert(!Ty.isVector() && "Expected scalars only!"); + if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC)) + return Reg; - // Select the copy into a subregister copy. + // Create a copy and immediately select it. + // FIXME: We should have an emitCopy function? + auto Copy = MIB.buildCopy({&RC}, {Reg}); selectCopy(*Copy, TII, MRI, TRI, RBI); return Copy.getReg(0); } -Register AArch64InstructionSelector::widenGPRBankRegIfNeeded( - Register Reg, unsigned WideSize, MachineIRBuilder &MIB) const { - assert(WideSize >= 8 && "WideSize is smaller than all possible registers?"); - MachineRegisterInfo &MRI = *MIB.getMRI(); - unsigned NarrowSize = MRI.getType(Reg).getSizeInBits(); - assert(WideSize >= NarrowSize && - "WideSize cannot be smaller than NarrowSize!"); - - // If the sizes match, just return the register. - // - // If NarrowSize is an s1, then we can select it to any size, so we'll treat - // it as a don't care. - if (NarrowSize == WideSize || NarrowSize == 1) - return Reg; - - // Now check the register classes. - const RegisterBank *RB = RBI.getRegBank(Reg, MRI, TRI); - const TargetRegisterClass *OrigRC = getMinClassForRegBank(*RB, NarrowSize); - const TargetRegisterClass *WideRC = getMinClassForRegBank(*RB, WideSize); - assert(OrigRC && "Could not determine narrow RC?"); - assert(WideRC && "Could not determine wide RC?"); - - // If the sizes differ, but the register classes are the same, there is no - // need to insert a SUBREG_TO_REG. - // - // For example, an s8 that's supposed to be a GPR will be selected to either - // a GPR32 or a GPR64 register. Note that this assumes that the s8 will - // always end up on a GPR32. - if (OrigRC == WideRC) - return Reg; - - // We have two different register classes. Insert a SUBREG_TO_REG. - unsigned SubReg = 0; - getSubRegForClass(OrigRC, TRI, SubReg); - assert(SubReg && "Couldn't determine subregister?"); - - // Build the SUBREG_TO_REG and return the new, widened register. - auto SubRegToReg = - MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {}) - .addImm(0) - .addUse(Reg) - .addImm(SubReg); - constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI); - return SubRegToReg.getReg(0); -} - /// Select an "extended register" operand. This operand folds in an extend /// followed by an optional left shift. InstructionSelector::ComplexRendererFns @@ -5552,7 +5507,7 @@ AArch64InstructionSelector::selectArithExtendedRegister( // We require a GPR32 here. Narrow the ExtReg if needed using a subregister // copy. MachineIRBuilder MIB(*RootDef); - ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB); + ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB); return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, [=](MachineInstrBuilder &MIB) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir index 977bb5a64cf5..9962bd87c175 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir @@ -78,8 +78,9 @@ body: | ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: liveins: $h0 ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub - ; CHECK: %copy:gpr32 = COPY [[SUBREG_TO_REG]] - ; CHECK: TBNZW %copy, 3, %bb.1 + ; CHECK: %copy:gpr32all = COPY [[SUBREG_TO_REG]] + ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %copy + ; CHECK: TBNZW [[COPY]], 3, %bb.1 ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir index efb999909ccc..d5902d70842b 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/subreg-copy.mir @@ -34,3 +34,35 @@ body: | bb.1: RET_ReallyLR ... +--- +name: no_trunc +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: no_trunc + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $x0 + ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0 + ; CHECK: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY]], 0 :: (load 16) + ; CHECK: [[COPY1:%[0-9]+]]:gpr64all = COPY [[LDRQui]].dsub + ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[COPY1]] + ; CHECK: TBNZX [[COPY2]], 33, %bb.1 + ; CHECK: bb.1: + ; CHECK: RET_ReallyLR + bb.0: + liveins: $x0 + %1:gpr(p0) = COPY $x0 + %3:gpr(s64) = G_CONSTANT i64 8589934592 + %5:gpr(s64) = G_CONSTANT i64 0 + %0:fpr(s128) = G_LOAD %1:gpr(p0) :: (load 16) + %2:fpr(s64) = G_TRUNC %0:fpr(s128) + %8:gpr(s64) = COPY %2:fpr(s64) + %4:gpr(s64) = G_AND %8:gpr, %3:gpr + %7:gpr(s32) = G_ICMP intpred(ne), %4:gpr(s64), %5:gpr + %6:gpr(s1) = G_TRUNC %7:gpr(s32) + G_BRCOND %6:gpr(s1), %bb.1 + bb.1: + RET_ReallyLR diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir index 22963c50a2eb..7db671b08224 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir @@ -106,8 +106,9 @@ body: | ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: liveins: $w0 ; CHECK: %reg:gpr32all = COPY $w0 - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32 - ; CHECK: TBZX [[SUBREG_TO_REG]], 33, %bb.1 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, %reg, %subreg.sub_32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]] + ; CHECK: TBZX [[COPY]], 33, %bb.1 ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR @@ -140,8 +141,9 @@ body: | ; CHECK: bb.0: ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000) ; CHECK: %reg:gpr32 = IMPLICIT_DEF - ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32 - ; CHECK: TBZX [[SUBREG_TO_REG]], 33, %bb.1 + ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, %reg, %subreg.sub_32 + ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY [[SUBREG_TO_REG]] + ; CHECK: TBZX [[COPY]], 33, %bb.1 ; CHECK: B %bb.0 ; CHECK: bb.1: ; CHECK: RET_ReallyLR |