AArch64SchedA510.td - OpenGrok cross reference for /freebsd-src/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA510.td

Lines Matching +full:sd +full:- +full:hs
1 //==- AArch64SchedCortexA510.td - ARM Cortex-A510 Scheduling Definitions -*- tablegen -*-=//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the machine model for the ARM Cortex-A510 processor.
11 //===----------------------------------------------------------------------===//
13 // ===---------------------------------------------------------------------===//
14 // The following definitions describe the per-operand machine model.
17 // Cortex-A510 machine model for scheduling and other instruction cost heuristics.
19   let MicroOpBufferSize = 0;  // The Cortex-A510 is an in-order processor
20   let IssueWidth = 3;         // It dual-issues under most circumstances
25   let CompleteModel = 0;      // Covers instructions applicable to Cortex-A510.
32 //===----------------------------------------------------------------------===//
33 // Subtarget-specific SchedWrite types
37 //===----------------------------------------------------------------------===//
41 // Cortex-A510 is in-order.
45   def CortexA510UnitMAC    : ProcResource<1>;    // Int MAC, 64-bi wide
54   // instructions, which can mostly be dual-issued; that's why for now we model
70 def : WriteRes<WriteISReg, [CortexA510UnitALU]> { let Latency = 2; }  // ALU of Shifted-Reg
71 def : WriteRes<WriteIEReg, [CortexA510UnitALU]> { let Latency = 2; }  // ALU of Extended-Reg
76 def : WriteRes<WriteIM32, [CortexA510UnitMAC]> { let Latency = 3; }   // 32-bit Multiply
77 def : WriteRes<WriteIM64, [CortexA510UnitMAC]> { let Latency = 5; let ReleaseAtCycles = [2];}   // 64-bit Multiply
87 //===----------------------------------------------------------------------===//
90 //===----------------------------------------------------------------------===//
106 //===----------------------------------------------------------------------===//
107 // Define generic 2 micro-op types
144 // Pre/Post Indexing - Performed as part of address generation
155 // Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
220 //===----------------------------------------------------------------------===//
221 // Subtarget-specific SchedRead types.
240 //===----------------------------------------------------------------------===//
241 // Subtarget-specific InstRWs.
250 // -----------------------------------------------------------------------------
271 //---
273 //---
283 //---
284 // Vector Loads - 128-bit per cycle
285 //---
286 //   1-element structures
309 //    2-element structures
320 //    3-element structures
331 //    4-element structures
332 def : InstRW<[CortexA510WriteVLD2], (instregex "LD4i(8|16|32|64)$")>;                // load single 4-el structure to one lane of 4 regs.
333 def : InstRW<[CortexA510WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs.
334 def : InstRW<[CortexA510WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>;           // load multiple 4-el structures to 4 regs.
342 //---
344 //---
373 //---
375 //---
429 def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(1i64|2i32|4i16|8i8)")>;
430 def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(2i64|4i32|8i16|16i8)")>;
501 // -----------------------------------------------------------------------------
537 // -----------------------------------------------------------------------------
560              (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
633 // -----------------------------------------------------------------------------
654                         "^ADR_LSL_ZZZ_[SD]_[0123]",
673 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
749              (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
750                         "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
757 // Complex dot product 8-bit element
760 // Complex dot product 16-bit element
763 // Complex multiply-add B, H, S element size
765                                             "^CMLA_ZZZI_[HS]")>;
767 // Complex multiply-add D element size
775                                             "^COMPACT_ZPZ_[SD]",
779 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]")>;
785 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
824                                             "^[SU]XTH_ZPmZ_[SD]",
843 def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^HISTCNT_ZPzZZ_[SD]",
877 // Matrix multiply-accumulate
893 def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
906                                             "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
910                                             "^SQDML[AS](LB|LT)_ZZZI_[SD]")>;
914                                             "^SQDMULH_ZZZI_[HS]")>;
921                                             "^SQDMULL[BT]_ZZZI_[SD]")>;
927                                             "^SQRDML[AS]H_ZZZI_[HS]",
928                                             "^SQRDCMLAH_ZZZI_[HS]")>;
937                                             "^SQRDMULH_ZZZI_[HS]")>;
975                                            "^REVH_ZPmZ_[SD]",
996 // SVE floating-point instructions
997 // -----------------------------------------------------------------------------
1000 def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FAB[SD]_ZPmZ_[HSD]",
1001                                                                   "^FAB[SD]_ZPZZ_[HSD]")>;
1029                                            "^FCMLA_ZZZI_[HS]")>;
1154 // -----------------------------------------------------------------------------
1169 // -----------------------------------------------------------------------------
1180                                            "^LD1S?H_[SD]_IMM$",
1185                                              "^LD1S?H_[SD]$",
1192                                            "^LD1RS?H_[SD]_IMM$",
1205 // Non temporal gather load, vector + scalar 32-bit element size
1209 // Non temporal gather load, vector + scalar 64-bit element size
1216                                               "^LDFF1S?H_[SD]$",
1222                                            "^LDNF1S?H_[SD]_IMM$",
1243 // Gather load, vector + imm, 32-bit element size
1247 // Gather load, vector + imm, 64-bit element size
1251 // Gather load, 64-bit element size
1258 // Gather load, 32-bit scaled offset
1263 // Gather load, 32-bit unpacked unscaled offset
1269 // -----------------------------------------------------------------------------
1280                                                 "^ST1H_[SD]_IMM$",
1284 def : InstRW<[CortexA510VSt0], (instregex "^ST1H(_[SD])?$")>;
1323 // Scatter non temporal store, vector + scalar 32-bit element size
1326 // Scatter non temporal store, vector + scalar 64-bit element size
1329 // Scatter store vector + imm 32-bit element size
1333 // Scatter store vector + imm 64-bit element size
1337 // Scatter store, 32-bit scaled offset
1341 // Scatter store, 32-bit unpacked unscaled offset
1345 // Scatter store, 32-bit unpacked scaled offset
1349 // Scatter store, 32-bit unscaled offset
1353 // Scatter store, 64-bit scaled offset
1357 // Scatter store, 64-bit unscaled offset
1362 // -----------------------------------------------------------------------------
1378 // -----------------------------------------------------------------------------