Lines Matching +full:sd +full:- +full:hs
1 //==- AArch64SchedCortexA510.td - ARM Cortex-A510 Scheduling Definitions -*- tablegen -*-=//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the machine model for the ARM Cortex-A510 processor.
11 //===----------------------------------------------------------------------===//
13 // ===---------------------------------------------------------------------===//
14 // The following definitions describe the per-operand machine model.
17 // Cortex-A510 machine model for scheduling and other instruction cost heuristics.
19 let MicroOpBufferSize = 0; // The Cortex-A510 is an in-order processor
20 let IssueWidth = 3; // It dual-issues under most circumstances
25 let CompleteModel = 0; // Covers instructions applicable to Cortex-A510.
32 //===----------------------------------------------------------------------===//
33 // Subtarget-specific SchedWrite types
37 //===----------------------------------------------------------------------===//
41 // Cortex-A510 is in-order.
45 def CortexA510UnitMAC : ProcResource<1>; // Int MAC, 64-bi wide
54 // instructions, which can mostly be dual-issued; that's why for now we model
70 def : WriteRes<WriteISReg, [CortexA510UnitALU]> { let Latency = 2; } // ALU of Shifted-Reg
71 def : WriteRes<WriteIEReg, [CortexA510UnitALU]> { let Latency = 2; } // ALU of Extended-Reg
76 def : WriteRes<WriteIM32, [CortexA510UnitMAC]> { let Latency = 3; } // 32-bit Multiply
77 def : WriteRes<WriteIM64, [CortexA510UnitMAC]> { let Latency = 5; let ReleaseAtCycles = [2];} // 64-bit Multiply
87 //===----------------------------------------------------------------------===//
90 //===----------------------------------------------------------------------===//
106 //===----------------------------------------------------------------------===//
107 // Define generic 2 micro-op types
144 // Pre/Post Indexing - Performed as part of address generation
155 // Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
220 //===----------------------------------------------------------------------===//
221 // Subtarget-specific SchedRead types.
240 //===----------------------------------------------------------------------===//
241 // Subtarget-specific InstRWs.
250 // -----------------------------------------------------------------------------
271 //---
273 //---
283 //---
284 // Vector Loads - 128-bit per cycle
285 //---
286 // 1-element structures
309 // 2-element structures
320 // 3-element structures
331 // 4-element structures
332 def : InstRW<[CortexA510WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; // load single 4-el structure to one lane of 4 regs.
333 def : InstRW<[CortexA510WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs.
334 def : InstRW<[CortexA510WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>; // load multiple 4-el structures to 4 regs.
342 //---
344 //---
373 //---
375 //---
429 def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(1i64|2i32|4i16|8i8)")>;
430 def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(2i64|4i32|8i16|16i8)")>;
501 // -----------------------------------------------------------------------------
537 // -----------------------------------------------------------------------------
560 (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
633 // -----------------------------------------------------------------------------
654 "^ADR_LSL_ZZZ_[SD]_[0123]",
673 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
749 (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
750 "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
757 // Complex dot product 8-bit element
760 // Complex dot product 16-bit element
763 // Complex multiply-add B, H, S element size
765 "^CMLA_ZZZI_[HS]")>;
767 // Complex multiply-add D element size
775 "^COMPACT_ZPZ_[SD]",
779 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]")>;
785 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
824 "^[SU]XTH_ZPmZ_[SD]",
843 def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^HISTCNT_ZPzZZ_[SD]",
877 // Matrix multiply-accumulate
893 def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
906 "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
910 "^SQDML[AS](LB|LT)_ZZZI_[SD]")>;
914 "^SQDMULH_ZZZI_[HS]")>;
921 "^SQDMULL[BT]_ZZZI_[SD]")>;
927 "^SQRDML[AS]H_ZZZI_[HS]",
928 "^SQRDCMLAH_ZZZI_[HS]")>;
937 "^SQRDMULH_ZZZI_[HS]")>;
975 "^REVH_ZPmZ_[SD]",
996 // SVE floating-point instructions
997 // -----------------------------------------------------------------------------
1000 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FAB[SD]_ZPmZ_[HSD]",
1001 "^FAB[SD]_ZPZZ_[HSD]")>;
1029 "^FCMLA_ZZZI_[HS]")>;
1154 // -----------------------------------------------------------------------------
1169 // -----------------------------------------------------------------------------
1180 "^LD1S?H_[SD]_IMM$",
1185 "^LD1S?H_[SD]$",
1192 "^LD1RS?H_[SD]_IMM$",
1205 // Non temporal gather load, vector + scalar 32-bit element size
1209 // Non temporal gather load, vector + scalar 64-bit element size
1216 "^LDFF1S?H_[SD]$",
1222 "^LDNF1S?H_[SD]_IMM$",
1243 // Gather load, vector + imm, 32-bit element size
1247 // Gather load, vector + imm, 64-bit element size
1251 // Gather load, 64-bit element size
1258 // Gather load, 32-bit scaled offset
1263 // Gather load, 32-bit unpacked unscaled offset
1269 // -----------------------------------------------------------------------------
1280 "^ST1H_[SD]_IMM$",
1284 def : InstRW<[CortexA510VSt0], (instregex "^ST1H(_[SD])?$")>;
1323 // Scatter non temporal store, vector + scalar 32-bit element size
1326 // Scatter non temporal store, vector + scalar 64-bit element size
1329 // Scatter store vector + imm 32-bit element size
1333 // Scatter store vector + imm 64-bit element size
1337 // Scatter store, 32-bit scaled offset
1341 // Scatter store, 32-bit unpacked unscaled offset
1345 // Scatter store, 32-bit unpacked scaled offset
1349 // Scatter store, 32-bit unscaled offset
1353 // Scatter store, 64-bit scaled offset
1357 // Scatter store, 64-bit unscaled offset
1362 // -----------------------------------------------------------------------------
1378 // -----------------------------------------------------------------------------