14c2d3b02SDimitry Andric//=- AArch64SchedAmpere1B.td - Ampere-1B scheduling def -----*- tablegen -*-=// 24c2d3b02SDimitry Andric// 34c2d3b02SDimitry Andric// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 44c2d3b02SDimitry Andric// See https://llvm.org/LICENSE.txt for license information. 54c2d3b02SDimitry Andric// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 64c2d3b02SDimitry Andric// 74c2d3b02SDimitry Andric//===----------------------------------------------------------------------===// 84c2d3b02SDimitry Andric// 94c2d3b02SDimitry Andric// This file defines the machine model for the Ampere Computing Ampere-1B to 104c2d3b02SDimitry Andric// support instruction scheduling and other instruction cost heuristics. 114c2d3b02SDimitry Andric// 124c2d3b02SDimitry Andric//===----------------------------------------------------------------------===// 134c2d3b02SDimitry Andric 144c2d3b02SDimitry Andric// The Ampere-1B core is an out-of-order micro-architecture. The front 154c2d3b02SDimitry Andric// end has branch prediction, with a 10-cycle recovery time from a 164c2d3b02SDimitry Andric// mispredicted branch. Instructions coming out of the front end are 174c2d3b02SDimitry Andric// decoded into internal micro-ops (uops). 184c2d3b02SDimitry Andric 194c2d3b02SDimitry Andricdef Ampere1BModel : SchedMachineModel { 204c2d3b02SDimitry Andric let IssueWidth = 12; // Maximum micro-ops dispatch rate. 21*0fca6ea1SDimitry Andric let MicroOpBufferSize = 208; // micro-op re-order buffer size 224c2d3b02SDimitry Andric let LoadLatency = 3; // Optimistic load latency 234c2d3b02SDimitry Andric let MispredictPenalty = 10; // Branch mispredict penalty 244c2d3b02SDimitry Andric let LoopMicroOpBufferSize = 32; // Instruction queue size 254c2d3b02SDimitry Andric let CompleteModel = 1; 264c2d3b02SDimitry Andric 274c2d3b02SDimitry Andric list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, 284c2d3b02SDimitry Andric SMEUnsupported.F, 294c2d3b02SDimitry Andric PAUnsupported.F); 304c2d3b02SDimitry Andric} 314c2d3b02SDimitry Andric 324c2d3b02SDimitry Andriclet SchedModel = Ampere1BModel in { 334c2d3b02SDimitry Andric 344c2d3b02SDimitry Andric//===----------------------------------------------------------------------===// 354c2d3b02SDimitry Andric// Define each kind of processor resource and number available on Ampere-1B. 364c2d3b02SDimitry Andric 374c2d3b02SDimitry Andricdef Ampere1BUnitA : ProcResource<2>; // integer single-cycle, branch, and flags r/w 384c2d3b02SDimitry Andricdef Ampere1BUnitB : ProcResource<2>; // integer single-cycle, and complex shifts 394c2d3b02SDimitry Andricdef Ampere1BUnitBS : ProcResource<1>; // integer multi-cycle 404c2d3b02SDimitry Andricdef Ampere1BUnitL : ProcResource<2>; // load 414c2d3b02SDimitry Andricdef Ampere1BUnitS : ProcResource<2>; // store address calculation 424c2d3b02SDimitry Andricdef Ampere1BUnitX : ProcResource<1>; // FP and vector operations, and flag write 434c2d3b02SDimitry Andricdef Ampere1BUnitY : ProcResource<1>; // FP and vector operations, and crypto 444c2d3b02SDimitry Andricdef Ampere1BUnitZ : ProcResource<1>; // FP store data and FP-to-integer moves 454c2d3b02SDimitry Andric 464c2d3b02SDimitry Andricdef Ampere1BUnitAB : ProcResGroup<[Ampere1BUnitA, Ampere1BUnitB]>; 474c2d3b02SDimitry Andricdef Ampere1BUnitXY : ProcResGroup<[Ampere1BUnitX, Ampere1BUnitY]>; 484c2d3b02SDimitry Andric 494c2d3b02SDimitry Andric//===----------------------------------------------------------------------===// 504c2d3b02SDimitry Andric// Define customized scheduler read/write types specific to the Ampere-1. 514c2d3b02SDimitry Andric 524c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1A : SchedWriteRes<[Ampere1BUnitA]> { 534c2d3b02SDimitry Andric let Latency = 1; 544c2d3b02SDimitry Andric let NumMicroOps = 1; 554c2d3b02SDimitry Andric} 564c2d3b02SDimitry Andric 574c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_2A : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitA]> { 584c2d3b02SDimitry Andric let Latency = 1; 594c2d3b02SDimitry Andric let NumMicroOps = 2; 604c2d3b02SDimitry Andric} 614c2d3b02SDimitry Andric 624c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1B : SchedWriteRes<[Ampere1BUnitB]> { 634c2d3b02SDimitry Andric let Latency = 1; 644c2d3b02SDimitry Andric let NumMicroOps = 1; 654c2d3b02SDimitry Andric} 664c2d3b02SDimitry Andric 674c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { 684c2d3b02SDimitry Andric let Latency = 1; 694c2d3b02SDimitry Andric let NumMicroOps = 1; 704c2d3b02SDimitry Andric} 714c2d3b02SDimitry Andric 724c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1BS_1B : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitB]> { 734c2d3b02SDimitry Andric let Latency = 1; 744c2d3b02SDimitry Andric let NumMicroOps = 2; 754c2d3b02SDimitry Andric} 764c2d3b02SDimitry Andric 774c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1AB : SchedWriteRes<[Ampere1BUnitAB]> { 784c2d3b02SDimitry Andric let Latency = 1; 794c2d3b02SDimitry Andric let NumMicroOps = 1; 804c2d3b02SDimitry Andric} 814c2d3b02SDimitry Andric 824c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1AB_1A : SchedWriteRes<[Ampere1BUnitAB, Ampere1BUnitA]> { 834c2d3b02SDimitry Andric let Latency = 1; 844c2d3b02SDimitry Andric let NumMicroOps = 2; 854c2d3b02SDimitry Andric} 864c2d3b02SDimitry Andric 874c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { 884c2d3b02SDimitry Andric let Latency = 1; 894c2d3b02SDimitry Andric let NumMicroOps = 1; 904c2d3b02SDimitry Andric} 914c2d3b02SDimitry Andric 924c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_1S : SchedWriteRes<[Ampere1BUnitS]> { 934c2d3b02SDimitry Andric let Latency = 1; 944c2d3b02SDimitry Andric let NumMicroOps = 1; 954c2d3b02SDimitry Andric} 964c2d3b02SDimitry Andric 974c2d3b02SDimitry Andricdef Ampere1BWrite_1cyc_2S : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS]> { 984c2d3b02SDimitry Andric let Latency = 1; 994c2d3b02SDimitry Andric let NumMicroOps = 2; 1004c2d3b02SDimitry Andric} 1014c2d3b02SDimitry Andric 1024c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1Y : SchedWriteRes<[Ampere1BUnitY]> { 1034c2d3b02SDimitry Andric let Latency = 2; 1044c2d3b02SDimitry Andric let NumMicroOps = 1; 1054c2d3b02SDimitry Andric} 1064c2d3b02SDimitry Andric 1074c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_2AB : SchedWriteRes<[Ampere1BUnitAB, Ampere1BUnitAB]> { 1084c2d3b02SDimitry Andric let Latency = 2; 1094c2d3b02SDimitry Andric let NumMicroOps = 2; 1104c2d3b02SDimitry Andric} 1114c2d3b02SDimitry Andric 1124c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1B_1AB : SchedWriteRes<[Ampere1BUnitB, Ampere1BUnitAB]> { 1134c2d3b02SDimitry Andric let Latency = 2; 1144c2d3b02SDimitry Andric let NumMicroOps = 2; 1154c2d3b02SDimitry Andric} 1164c2d3b02SDimitry Andric 1174c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1B_1S : SchedWriteRes<[Ampere1BUnitB, Ampere1BUnitS]> { 1184c2d3b02SDimitry Andric let Latency = 2; 1194c2d3b02SDimitry Andric let NumMicroOps = 2; 1204c2d3b02SDimitry Andric} 1214c2d3b02SDimitry Andric 1224c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1B_1S_1AB : SchedWriteRes<[Ampere1BUnitB, 1234c2d3b02SDimitry Andric Ampere1BUnitS, 1244c2d3b02SDimitry Andric Ampere1BUnitAB]> { 1254c2d3b02SDimitry Andric let Latency = 2; 1264c2d3b02SDimitry Andric let NumMicroOps = 3; 1274c2d3b02SDimitry Andric} 1284c2d3b02SDimitry Andric 1294c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1S_2Z : SchedWriteRes<[Ampere1BUnitS, 1304c2d3b02SDimitry Andric Ampere1BUnitZ, 1314c2d3b02SDimitry Andric Ampere1BUnitZ]> { 1324c2d3b02SDimitry Andric let Latency = 2; 1334c2d3b02SDimitry Andric let NumMicroOps = 3; 1344c2d3b02SDimitry Andric} 1354c2d3b02SDimitry Andric 1364c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { 1374c2d3b02SDimitry Andric let Latency = 2; 1384c2d3b02SDimitry Andric let NumMicroOps = 1; 1394c2d3b02SDimitry Andric} 1404c2d3b02SDimitry Andric 1414c2d3b02SDimitry Andricdef Ampere1BWrite_2cyc_1S_1Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitZ]> { 1424c2d3b02SDimitry Andric let Latency = 2; 1434c2d3b02SDimitry Andric let NumMicroOps = 2; 1444c2d3b02SDimitry Andric} 1454c2d3b02SDimitry Andric 1464c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { 1474c2d3b02SDimitry Andric let Latency = 3; 1484c2d3b02SDimitry Andric let NumMicroOps = 1; 1494c2d3b02SDimitry Andric} 1504c2d3b02SDimitry Andric 1514c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { 1524c2d3b02SDimitry Andric let Latency = 3; 1534c2d3b02SDimitry Andric let NumMicroOps = 1; 1544c2d3b02SDimitry Andric} 1554c2d3b02SDimitry Andric 1564c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 1574c2d3b02SDimitry Andric let Latency = 3; 1584c2d3b02SDimitry Andric let NumMicroOps = 1; 1594c2d3b02SDimitry Andric} 1604c2d3b02SDimitry Andric 1614c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { 1624c2d3b02SDimitry Andric let Latency = 3; 1634c2d3b02SDimitry Andric let NumMicroOps = 1; 1644c2d3b02SDimitry Andric} 1654c2d3b02SDimitry Andric 1664c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1Z : SchedWriteRes<[Ampere1BUnitZ]> { 1674c2d3b02SDimitry Andric let Latency = 3; 1684c2d3b02SDimitry Andric let NumMicroOps = 1; 1694c2d3b02SDimitry Andric} 1704c2d3b02SDimitry Andric 1714c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1S_1Z : SchedWriteRes<[Ampere1BUnitS, 1724c2d3b02SDimitry Andric Ampere1BUnitZ]> { 1734c2d3b02SDimitry Andric let Latency = 3; 1744c2d3b02SDimitry Andric let NumMicroOps = 2; 1754c2d3b02SDimitry Andric} 1764c2d3b02SDimitry Andric 1774c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_1S_2Z : SchedWriteRes<[Ampere1BUnitS, 1784c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 1794c2d3b02SDimitry Andric let Latency = 3; 1804c2d3b02SDimitry Andric let NumMicroOps = 3; 1814c2d3b02SDimitry Andric} 1824c2d3b02SDimitry Andric 1834c2d3b02SDimitry Andricdef Ampere1BWrite_3cyc_2S_2Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, 1844c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 1854c2d3b02SDimitry Andric let Latency = 3; 1864c2d3b02SDimitry Andric let NumMicroOps = 4; 1874c2d3b02SDimitry Andric} 1884c2d3b02SDimitry Andric 1894c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1BS_1AB : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitAB]> { 1904c2d3b02SDimitry Andric let Latency = 4; 1914c2d3b02SDimitry Andric let NumMicroOps = 2; 1924c2d3b02SDimitry Andric} 1934c2d3b02SDimitry Andric 1944c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { 1954c2d3b02SDimitry Andric let Latency = 4; 1964c2d3b02SDimitry Andric let NumMicroOps = 1; 1974c2d3b02SDimitry Andric} 1984c2d3b02SDimitry Andric 1994c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_2L : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL]> { 2004c2d3b02SDimitry Andric let Latency = 4; 2014c2d3b02SDimitry Andric let NumMicroOps = 2; 2024c2d3b02SDimitry Andric} 2034c2d3b02SDimitry Andric 2044c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1L_1B : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitB]> { 2054c2d3b02SDimitry Andric let Latency = 4; 2064c2d3b02SDimitry Andric let NumMicroOps = 2; 2074c2d3b02SDimitry Andric} 2084c2d3b02SDimitry Andric 2094c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 2104c2d3b02SDimitry Andric let Latency = 4; 2114c2d3b02SDimitry Andric let NumMicroOps = 1; 2124c2d3b02SDimitry Andric} 2134c2d3b02SDimitry Andric 2144c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { 2154c2d3b02SDimitry Andric let Latency = 4; 2164c2d3b02SDimitry Andric let NumMicroOps = 1; 2174c2d3b02SDimitry Andric} 2184c2d3b02SDimitry Andric 2194c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { 2204c2d3b02SDimitry Andric let Latency = 4; 2214c2d3b02SDimitry Andric let NumMicroOps = 2; 2224c2d3b02SDimitry Andric} 2234c2d3b02SDimitry Andric 2244c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { 2254c2d3b02SDimitry Andric let Latency = 5; 2264c2d3b02SDimitry Andric let NumMicroOps = 1; 2274c2d3b02SDimitry Andric} 2284c2d3b02SDimitry Andric 2294c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1BUnitXY, 2304c2d3b02SDimitry Andric Ampere1BUnitS, 2314c2d3b02SDimitry Andric Ampere1BUnitZ]> { 2324c2d3b02SDimitry Andric let Latency = 4; 2334c2d3b02SDimitry Andric let NumMicroOps = 3; 2344c2d3b02SDimitry Andric} 2354c2d3b02SDimitry Andric 2364c2d3b02SDimitry Andricdef Ampere1BWrite_4cyc_3S_3Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, 2374c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitZ, 2384c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 2394c2d3b02SDimitry Andric let Latency = 4; 2404c2d3b02SDimitry Andric let NumMicroOps = 6; 2414c2d3b02SDimitry Andric} 2424c2d3b02SDimitry Andric 2434c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_4S_4Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, 2444c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, 2454c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ, 2464c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 2474c2d3b02SDimitry Andric let Latency = 5; 2484c2d3b02SDimitry Andric let NumMicroOps = 8; 2494c2d3b02SDimitry Andric} 2504c2d3b02SDimitry Andric 2514c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_1L_1BS : SchedWriteRes<[Ampere1BUnitL, 2524c2d3b02SDimitry Andric Ampere1BUnitBS]> { 2534c2d3b02SDimitry Andric let Latency = 5; 2544c2d3b02SDimitry Andric let NumMicroOps = 2; 2554c2d3b02SDimitry Andric} 2564c2d3b02SDimitry Andric 2574c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_3L : SchedWriteRes<[Ampere1BUnitL, 2584c2d3b02SDimitry Andric Ampere1BUnitL, 2594c2d3b02SDimitry Andric Ampere1BUnitL]> { 2604c2d3b02SDimitry Andric let Latency = 5; 2614c2d3b02SDimitry Andric let NumMicroOps = 3; 2624c2d3b02SDimitry Andric} 2634c2d3b02SDimitry Andric 2644c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_4L : SchedWriteRes<[Ampere1BUnitL, 2654c2d3b02SDimitry Andric Ampere1BUnitL, 2664c2d3b02SDimitry Andric Ampere1BUnitL, 2674c2d3b02SDimitry Andric Ampere1BUnitL]> { 2684c2d3b02SDimitry Andric let Latency = 5; 2694c2d3b02SDimitry Andric let NumMicroOps = 4; 2704c2d3b02SDimitry Andric} 2714c2d3b02SDimitry Andric 2724c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 2734c2d3b02SDimitry Andric let Latency = 5; 2744c2d3b02SDimitry Andric let NumMicroOps = 1; 2754c2d3b02SDimitry Andric} 2764c2d3b02SDimitry Andric 2774c2d3b02SDimitry Andricdef Ampere1BWrite_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, 2784c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, 2794c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 2804c2d3b02SDimitry Andric let Latency = 5; 2814c2d3b02SDimitry Andric let NumMicroOps = 6; 2824c2d3b02SDimitry Andric} 2834c2d3b02SDimitry Andric 2844c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_1BS_1A : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitA]> { 2854c2d3b02SDimitry Andric let Latency = 6; 2864c2d3b02SDimitry Andric let NumMicroOps = 2; 2874c2d3b02SDimitry Andric} 2884c2d3b02SDimitry Andric 2894c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_1BS_2A : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitA, 2904c2d3b02SDimitry Andric Ampere1BUnitA]> { 2914c2d3b02SDimitry Andric let Latency = 6; 2924c2d3b02SDimitry Andric let NumMicroOps = 3; 2934c2d3b02SDimitry Andric} 2944c2d3b02SDimitry Andric 2954c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_1L_1XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitXY]> { 2964c2d3b02SDimitry Andric let Latency = 6; 2974c2d3b02SDimitry Andric let NumMicroOps = 2; 2984c2d3b02SDimitry Andric} 2994c2d3b02SDimitry Andric 3004c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_2L_2XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 3014c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 3024c2d3b02SDimitry Andric let Latency = 6; 3034c2d3b02SDimitry Andric let NumMicroOps = 4; 3044c2d3b02SDimitry Andric} 3054c2d3b02SDimitry Andric 3064c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 3074c2d3b02SDimitry Andric let Latency = 6; 3084c2d3b02SDimitry Andric let NumMicroOps = 2; 3094c2d3b02SDimitry Andric} 3104c2d3b02SDimitry Andric 3114c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { 3124c2d3b02SDimitry Andric let Latency = 6; 3134c2d3b02SDimitry Andric let NumMicroOps = 2; 3144c2d3b02SDimitry Andric} 3154c2d3b02SDimitry Andric 3164c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_3XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, 3174c2d3b02SDimitry Andric Ampere1BUnitXY]> { 3184c2d3b02SDimitry Andric let Latency = 6; 3194c2d3b02SDimitry Andric let NumMicroOps = 3; 3204c2d3b02SDimitry Andric} 3214c2d3b02SDimitry Andric 3224c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, 3234c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, 3244c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 3254c2d3b02SDimitry Andric let Latency = 6; 3264c2d3b02SDimitry Andric let NumMicroOps = 6; 3274c2d3b02SDimitry Andric} 3284c2d3b02SDimitry Andric 3294c2d3b02SDimitry Andricdef Ampere1BWrite_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, Ampere1BUnitXY, 3304c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, Ampere1BUnitS, 3314c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ, Ampere1BUnitZ]> { 3324c2d3b02SDimitry Andric let Latency = 6; 3334c2d3b02SDimitry Andric let NumMicroOps = 9; 3344c2d3b02SDimitry Andric} 3354c2d3b02SDimitry Andric 3364c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_1BS_1XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY]> { 3374c2d3b02SDimitry Andric let Latency = 7; 3384c2d3b02SDimitry Andric let NumMicroOps = 2; 3394c2d3b02SDimitry Andric} 3404c2d3b02SDimitry Andric 3414c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_1XY_1Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitZ]> { 3424c2d3b02SDimitry Andric let Latency = 7; 3434c2d3b02SDimitry Andric let NumMicroOps = 2; 3444c2d3b02SDimitry Andric} 3454c2d3b02SDimitry Andric 3464c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_1X_1Z : SchedWriteRes<[Ampere1BUnitX, Ampere1BUnitZ]> { 3474c2d3b02SDimitry Andric let Latency = 7; 3484c2d3b02SDimitry Andric let NumMicroOps = 2; 3494c2d3b02SDimitry Andric} 3504c2d3b02SDimitry Andric 3514c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 3524c2d3b02SDimitry Andric Ampere1BUnitL, Ampere1BUnitXY, 3534c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 3544c2d3b02SDimitry Andric let Latency = 7; 3554c2d3b02SDimitry Andric let NumMicroOps = 6; 3564c2d3b02SDimitry Andric} 3574c2d3b02SDimitry Andric 3584c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_4L_4XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 3594c2d3b02SDimitry Andric Ampere1BUnitL, Ampere1BUnitL, 3604c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 3614c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 3624c2d3b02SDimitry Andric let Latency = 7; 3634c2d3b02SDimitry Andric let NumMicroOps = 8; 3644c2d3b02SDimitry Andric} 3654c2d3b02SDimitry Andric 3664c2d3b02SDimitry Andricdef Ampere1BWrite_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, 3674c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 3684c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, 3694c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, 3704c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ, 3714c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 3724c2d3b02SDimitry Andric let Latency = 7; 3734c2d3b02SDimitry Andric let NumMicroOps = 12; 3744c2d3b02SDimitry Andric} 3754c2d3b02SDimitry Andric 3764c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_1BS_1L : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitL]> { 3774c2d3b02SDimitry Andric let Latency = 8; 3784c2d3b02SDimitry Andric let NumMicroOps = 2; 3794c2d3b02SDimitry Andric} 3804c2d3b02SDimitry Andric 3814c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_1BS_1XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY]> { 3824c2d3b02SDimitry Andric let Latency = 8; 3834c2d3b02SDimitry Andric let NumMicroOps = 2; 3844c2d3b02SDimitry Andric} 3854c2d3b02SDimitry Andric 3864c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_2L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 3874c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 3884c2d3b02SDimitry Andric Ampere1BUnitXY]> { 3894c2d3b02SDimitry Andric let Latency = 8; 3904c2d3b02SDimitry Andric let NumMicroOps = 5; 3914c2d3b02SDimitry Andric} 3924c2d3b02SDimitry Andric 3934c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 3944c2d3b02SDimitry Andric Ampere1BUnitL, Ampere1BUnitXY, 3954c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 3964c2d3b02SDimitry Andric let Latency = 8; 3974c2d3b02SDimitry Andric let NumMicroOps = 6; 3984c2d3b02SDimitry Andric} 3994c2d3b02SDimitry Andric 4004c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_4L_4XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 4014c2d3b02SDimitry Andric Ampere1BUnitL, Ampere1BUnitL, 4024c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 4034c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 4044c2d3b02SDimitry Andric let Latency = 8; 4054c2d3b02SDimitry Andric let NumMicroOps = 8; 4064c2d3b02SDimitry Andric} 4074c2d3b02SDimitry Andric 4084c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { 4094c2d3b02SDimitry Andric let Latency = 8; 4104c2d3b02SDimitry Andric let NumMicroOps = 2; 4114c2d3b02SDimitry Andric} 4124c2d3b02SDimitry Andric 4134c2d3b02SDimitry Andricdef Ampere1BWrite_8cyc_4XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, 4144c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 4154c2d3b02SDimitry Andric let Latency = 8; 4164c2d3b02SDimitry Andric let NumMicroOps = 4; 4174c2d3b02SDimitry Andric} 4184c2d3b02SDimitry Andric 4194c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, 4204c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 4214c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 4224c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, 4234c2d3b02SDimitry Andric Ampere1BUnitS, Ampere1BUnitS, 4244c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ, 4254c2d3b02SDimitry Andric Ampere1BUnitZ, Ampere1BUnitZ]> { 4264c2d3b02SDimitry Andric let Latency = 9; 4274c2d3b02SDimitry Andric let NumMicroOps = 14; 4284c2d3b02SDimitry Andric} 4294c2d3b02SDimitry Andric 4304c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_1A_1BS_1X : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitBS, Ampere1BUnitX]> { 4314c2d3b02SDimitry Andric let Latency = 9; 4324c2d3b02SDimitry Andric let NumMicroOps = 3; 4334c2d3b02SDimitry Andric} 4344c2d3b02SDimitry Andric 4354c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitBS, Ampere1BUnitXY]> { 4364c2d3b02SDimitry Andric let Latency = 9; 4374c2d3b02SDimitry Andric let NumMicroOps = 3; 4384c2d3b02SDimitry Andric} 4394c2d3b02SDimitry Andric 4404c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 4414c2d3b02SDimitry Andric Ampere1BUnitL, Ampere1BUnitXY, 4424c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 4434c2d3b02SDimitry Andric let Latency = 9; 4444c2d3b02SDimitry Andric let NumMicroOps = 6; 4454c2d3b02SDimitry Andric} 4464c2d3b02SDimitry Andric 4474c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 4484c2d3b02SDimitry Andric let Latency = 9; 4494c2d3b02SDimitry Andric let NumMicroOps = 1; 4504c2d3b02SDimitry Andric} 4514c2d3b02SDimitry Andric 4524c2d3b02SDimitry Andricdef Ampere1BWrite_9cyc_3XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, Ampere1BUnitXY]> { 4534c2d3b02SDimitry Andric let Latency = 9; 4544c2d3b02SDimitry Andric let NumMicroOps = 3; 4554c2d3b02SDimitry Andric} 4564c2d3b02SDimitry Andric 4574c2d3b02SDimitry Andricdef Ampere1BWrite_10cyc_4L_8XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 4584c2d3b02SDimitry Andric Ampere1BUnitL, Ampere1BUnitL, 4594c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 4604c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 4614c2d3b02SDimitry Andric let Latency = 10; 4624c2d3b02SDimitry Andric let NumMicroOps = 12; 4634c2d3b02SDimitry Andric} 4644c2d3b02SDimitry Andric 4654c2d3b02SDimitry Andricdef Ampere1BWrite_11cyc_1BS_2XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY, Ampere1BUnitXY]> { 4664c2d3b02SDimitry Andric let Latency = 11; 4674c2d3b02SDimitry Andric let NumMicroOps = 3; 4684c2d3b02SDimitry Andric} 4694c2d3b02SDimitry Andric 4704c2d3b02SDimitry Andricdef Ampere1BWrite_11cyc_4L_8XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, 4714c2d3b02SDimitry Andric Ampere1BUnitL, Ampere1BUnitL, 4724c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY, 4734c2d3b02SDimitry Andric Ampere1BUnitXY, Ampere1BUnitXY]> { 4744c2d3b02SDimitry Andric let Latency = 11; 4754c2d3b02SDimitry Andric let NumMicroOps = 12; 4764c2d3b02SDimitry Andric} 4774c2d3b02SDimitry Andric 4784c2d3b02SDimitry Andricdef Ampere1BWrite_12cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 4794c2d3b02SDimitry Andric let Latency = 12; 4804c2d3b02SDimitry Andric let NumMicroOps = 1; 4814c2d3b02SDimitry Andric} 4824c2d3b02SDimitry Andric 4834c2d3b02SDimitry Andricdef Ampere1BWrite_13cyc_1BS_1X : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitX]> { 4844c2d3b02SDimitry Andric let Latency = 13; 4854c2d3b02SDimitry Andric let NumMicroOps = 2; 4864c2d3b02SDimitry Andric} 4874c2d3b02SDimitry Andric 4884c2d3b02SDimitry Andricdef Ampere1BWrite_17cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 4894c2d3b02SDimitry Andric let Latency = 17; 4904c2d3b02SDimitry Andric let NumMicroOps = 1; 4914c2d3b02SDimitry Andric} 4924c2d3b02SDimitry Andric 4934c2d3b02SDimitry Andricdef Ampere1BWrite_19cyc_2BS_1X : SchedWriteRes<[Ampere1BUnitBS, 4944c2d3b02SDimitry Andric Ampere1BUnitBS, 4954c2d3b02SDimitry Andric Ampere1BUnitX]> { 4964c2d3b02SDimitry Andric let Latency = 13; 4974c2d3b02SDimitry Andric let NumMicroOps = 3; 4984c2d3b02SDimitry Andric} 4994c2d3b02SDimitry Andric 5004c2d3b02SDimitry Andricdef Ampere1BWrite_19cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 5014c2d3b02SDimitry Andric let Latency = 19; 5024c2d3b02SDimitry Andric let NumMicroOps = 1; 5034c2d3b02SDimitry Andric} 5044c2d3b02SDimitry Andric 5054c2d3b02SDimitry Andricdef Ampere1BWrite_21cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 5064c2d3b02SDimitry Andric let Latency = 21; 5074c2d3b02SDimitry Andric let NumMicroOps = 1; 5084c2d3b02SDimitry Andric} 5094c2d3b02SDimitry Andric 5104c2d3b02SDimitry Andricdef Ampere1BWrite_33cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 5114c2d3b02SDimitry Andric let Latency = 33; 5124c2d3b02SDimitry Andric let NumMicroOps = 1; 5134c2d3b02SDimitry Andric} 5144c2d3b02SDimitry Andric 5154c2d3b02SDimitry Andricdef Ampere1BWrite_39cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 5164c2d3b02SDimitry Andric let Latency = 39; 5174c2d3b02SDimitry Andric let NumMicroOps = 1; 5184c2d3b02SDimitry Andric} 5194c2d3b02SDimitry Andric 5204c2d3b02SDimitry Andricdef Ampere1BWrite_63cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { 5214c2d3b02SDimitry Andric let Latency = 63; 5224c2d3b02SDimitry Andric let NumMicroOps = 1; 5234c2d3b02SDimitry Andric} 5244c2d3b02SDimitry Andric 5254c2d3b02SDimitry Andric// For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4), 5264c2d3b02SDimitry Andric// which are a single uop, and for extended registers, which have full flexibility 5274c2d3b02SDimitry Andric// across Unit A or B for both uops. 5284c2d3b02SDimitry Andricdef Ampere1BWrite_Arith : SchedWriteVariant<[ 5294c2d3b02SDimitry Andric SchedVar<RegExtendedPred, [Ampere1BWrite_2cyc_2AB]>, 5304c2d3b02SDimitry Andric SchedVar<IsCheapLSL, [Ampere1BWrite_1cyc_1AB]>, 5314c2d3b02SDimitry Andric SchedVar<NoSchedPred, [Ampere1BWrite_2cyc_1B_1AB]>]>; 5324c2d3b02SDimitry Andric 5334c2d3b02SDimitry Andricdef Ampere1BWrite_ArithFlagsetting : SchedWriteVariant<[ 5344c2d3b02SDimitry Andric SchedVar<RegExtendedPred, [Ampere1BWrite_2cyc_2AB]>, 5354c2d3b02SDimitry Andric SchedVar<IsCheapLSL, [Ampere1BWrite_1cyc_1AB]>, 5364c2d3b02SDimitry Andric SchedVar<NoSchedPred, [Ampere1BWrite_2cyc_1B_1AB]>]>; 5374c2d3b02SDimitry Andric 5384c2d3b02SDimitry Andric//===----------------------------------------------------------------------===// 5394c2d3b02SDimitry Andric// Map the target-defined scheduler read/write resources and latencies for Ampere-1. 5404c2d3b02SDimitry Andric// This provides a coarse model, which is then specialised below. 5414c2d3b02SDimitry Andric 5424c2d3b02SDimitry Andricdef : WriteRes<WriteImm, [Ampere1BUnitAB]>; // MOVN, MOVZ 5434c2d3b02SDimitry Andricdef : WriteRes<WriteI, [Ampere1BUnitAB]>; // ALU 5444c2d3b02SDimitry Andricdef : WriteRes<WriteISReg, [Ampere1BUnitB, Ampere1BUnitAB]> { 5454c2d3b02SDimitry Andric let Latency = 2; 5464c2d3b02SDimitry Andric let NumMicroOps = 2; 5474c2d3b02SDimitry Andric} // ALU of Shifted-Reg 5484c2d3b02SDimitry Andricdef : WriteRes<WriteIEReg, [Ampere1BUnitAB, Ampere1BUnitAB]> { 5494c2d3b02SDimitry Andric let Latency = 2; 5504c2d3b02SDimitry Andric let NumMicroOps = 2; 5514c2d3b02SDimitry Andric} // ALU of Extended-Reg 5524c2d3b02SDimitry Andricdef : WriteRes<WriteExtr, [Ampere1BUnitB]>; // EXTR shifts a reg pair 5534c2d3b02SDimitry Andricdef : WriteRes<WriteIS, [Ampere1BUnitB]>; // Shift/Scale 5544c2d3b02SDimitry Andricdef : WriteRes<WriteID32, [Ampere1BUnitBS, Ampere1BUnitX]> { 5554c2d3b02SDimitry Andric let Latency = 13; 5564c2d3b02SDimitry Andric} // 32-bit Divide 5574c2d3b02SDimitry Andricdef : WriteRes<WriteID64, [Ampere1BUnitBS, Ampere1BUnitX]> { 5584c2d3b02SDimitry Andric let Latency = 19; 5594c2d3b02SDimitry Andric} // 64-bit Divide 5604c2d3b02SDimitry Andricdef : WriteRes<WriteIM32, [Ampere1BUnitBS]> { 5614c2d3b02SDimitry Andric let Latency = 3; 5624c2d3b02SDimitry Andric} // 32-bit Multiply 5634c2d3b02SDimitry Andricdef : WriteRes<WriteIM64, [Ampere1BUnitBS, Ampere1BUnitAB]> { 5644c2d3b02SDimitry Andric let Latency = 3; 5654c2d3b02SDimitry Andric} // 64-bit Multiply 5664c2d3b02SDimitry Andricdef : WriteRes<WriteBr, [Ampere1BUnitA]>; 5674c2d3b02SDimitry Andricdef : WriteRes<WriteBrReg, [Ampere1BUnitA, Ampere1BUnitA]>; 5684c2d3b02SDimitry Andricdef : WriteRes<WriteLD, [Ampere1BUnitL]> { 5694c2d3b02SDimitry Andric let Latency = 3; 5704c2d3b02SDimitry Andric} // Load from base addr plus immediate offset 5714c2d3b02SDimitry Andricdef : WriteRes<WriteST, [Ampere1BUnitS]> { 5724c2d3b02SDimitry Andric let Latency = 1; 5734c2d3b02SDimitry Andric} // Store to base addr plus immediate offset 5744c2d3b02SDimitry Andricdef : WriteRes<WriteSTP, [Ampere1BUnitS, Ampere1BUnitS]> { 5754c2d3b02SDimitry Andric let Latency = 1; 5764c2d3b02SDimitry Andric let NumMicroOps = 1; 5774c2d3b02SDimitry Andric} // Store a register pair. 5784c2d3b02SDimitry Andricdef : WriteRes<WriteAdr, [Ampere1BUnitAB]>; 5794c2d3b02SDimitry Andricdef : WriteRes<WriteLDIdx, [Ampere1BUnitAB, Ampere1BUnitS]> { 5804c2d3b02SDimitry Andric let Latency = 3; 5814c2d3b02SDimitry Andric let NumMicroOps = 1; 5824c2d3b02SDimitry Andric} // Load from a register index (maybe scaled). 5834c2d3b02SDimitry Andricdef : WriteRes<WriteSTIdx, [Ampere1BUnitS, Ampere1BUnitS]> { 5844c2d3b02SDimitry Andric let Latency = 1; 5854c2d3b02SDimitry Andric let NumMicroOps = 2; 5864c2d3b02SDimitry Andric} // Store to a register index (maybe scaled). 5874c2d3b02SDimitry Andricdef : WriteRes<WriteF, [Ampere1BUnitXY]> { 5884c2d3b02SDimitry Andric let Latency = 2; 5894c2d3b02SDimitry Andric} // General floating-point ops. 5904c2d3b02SDimitry Andricdef : WriteRes<WriteFCmp, [Ampere1BUnitX]> { 5914c2d3b02SDimitry Andric let Latency = 3; 5924c2d3b02SDimitry Andric} // Floating-point compare. 5934c2d3b02SDimitry Andricdef : WriteRes<WriteFCvt, [Ampere1BUnitXY]> { 5944c2d3b02SDimitry Andric let Latency = 3; 5954c2d3b02SDimitry Andric} // Float conversion. 5964c2d3b02SDimitry Andricdef : WriteRes<WriteFCopy, [Ampere1BUnitXY]> { 5974c2d3b02SDimitry Andric} // Float-int register copy. 5984c2d3b02SDimitry Andricdef : WriteRes<WriteFImm, [Ampere1BUnitXY]> { 5994c2d3b02SDimitry Andric let Latency = 2; 6004c2d3b02SDimitry Andric} // Float-int register copy. 6014c2d3b02SDimitry Andricdef : WriteRes<WriteFMul, [Ampere1BUnitXY]> { 6024c2d3b02SDimitry Andric let Latency = 4; 6034c2d3b02SDimitry Andric} // Floating-point multiply. 6044c2d3b02SDimitry Andricdef : WriteRes<WriteFDiv, [Ampere1BUnitXY]> { 6054c2d3b02SDimitry Andric let Latency = 19; 6064c2d3b02SDimitry Andric} // Floating-point division. 6074c2d3b02SDimitry Andricdef : WriteRes<WriteVd, [Ampere1BUnitXY]> { 6084c2d3b02SDimitry Andric let Latency = 3; 6094c2d3b02SDimitry Andric} // 64bit Vector D ops. 6104c2d3b02SDimitry Andricdef : WriteRes<WriteVq, [Ampere1BUnitXY]> { 6114c2d3b02SDimitry Andric let Latency = 3; 6124c2d3b02SDimitry Andric} // 128bit Vector Q ops. 6134c2d3b02SDimitry Andricdef : WriteRes<WriteVLD, [Ampere1BUnitL, Ampere1BUnitL]> { 6144c2d3b02SDimitry Andric let Latency = 4; 6154c2d3b02SDimitry Andric} // Vector loads. 6164c2d3b02SDimitry Andricdef : WriteRes<WriteVST, [Ampere1BUnitS, Ampere1BUnitZ]> { 6174c2d3b02SDimitry Andric let Latency = 2; 6184c2d3b02SDimitry Andric} // Vector stores. 6194c2d3b02SDimitry Andric 6204c2d3b02SDimitry Andricdef : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 6214c2d3b02SDimitry Andric 6224c2d3b02SDimitry Andricdef : WriteRes<WriteSys, []> { let Latency = 1; } 6234c2d3b02SDimitry Andricdef : WriteRes<WriteBarrier, []> { let Latency = 1; } 6244c2d3b02SDimitry Andricdef : WriteRes<WriteHint, []> { let Latency = 1; } 6254c2d3b02SDimitry Andric 6264c2d3b02SDimitry Andricdef : WriteRes<WriteLDHi, []> { 6274c2d3b02SDimitry Andric let Latency = 3; 6284c2d3b02SDimitry Andric} // The second register of a load-pair: LDP,LDPSW,LDNP,LDXP,LDAXP 6294c2d3b02SDimitry Andric 6304c2d3b02SDimitry Andric// Forwarding logic. 6314c2d3b02SDimitry Andricdef : ReadAdvance<ReadI, 0>; 6324c2d3b02SDimitry Andricdef : ReadAdvance<ReadISReg, 0>; 6334c2d3b02SDimitry Andricdef : ReadAdvance<ReadIEReg, 0>; 6344c2d3b02SDimitry Andricdef : ReadAdvance<ReadIM, 0>; 6354c2d3b02SDimitry Andricdef : ReadAdvance<ReadIMA, 1, [WriteIM32, WriteIM64]>; 6364c2d3b02SDimitry Andricdef : ReadAdvance<ReadID, 0>; 6374c2d3b02SDimitry Andricdef : ReadAdvance<ReadExtrHi, 0>; 6384c2d3b02SDimitry Andricdef : ReadAdvance<ReadST, 0>; 6394c2d3b02SDimitry Andricdef : ReadAdvance<ReadAdrBase, 0>; 6404c2d3b02SDimitry Andricdef : ReadAdvance<ReadVLD, 0>; 6414c2d3b02SDimitry Andric 6424c2d3b02SDimitry Andric//===----------------------------------------------------------------------===// 6434c2d3b02SDimitry Andric// Specialising the scheduling model further for Ampere-1B. 6444c2d3b02SDimitry Andric 6454c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs COPY)>; 6464c2d3b02SDimitry Andric 6474c2d3b02SDimitry Andric// Branch instructions 6484c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], (instrs Bcc, BL, RET)>; 6494c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], 6504c2d3b02SDimitry Andric (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>; 6514c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2A], (instrs BLR)>; 6524c2d3b02SDimitry Andric 6534c2d3b02SDimitry Andric// Common Short Sequence Compression (CSSC) 6544c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], (instregex "^ABS[WX]")>; 6554c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1BS], (instregex "^CNT[WX]")>; 6564c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "^CTZ[WX]")>; 6574c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB_1A], (instregex "^[SU](MAX|MIN)[WX]")>; 6584c2d3b02SDimitry Andric 6594c2d3b02SDimitry Andric// Cryptography instructions 6604c2d3b02SDimitry Andric// -- AES encryption/decryption 6614c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^AES[DE]")>; 6624c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^AESI?MC")>; 6634c2d3b02SDimitry Andric// -- Polynomial multiplication 6644c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^PMUL", "^PMULL")>; 6654c2d3b02SDimitry Andric// -- SHA-256 hash 6664c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA256(H|H2)")>; 6674c2d3b02SDimitry Andric// -- SHA-256 schedule update 6684c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA256SU[01]")>; 6694c2d3b02SDimitry Andric// -- SHA-3 instructions 6704c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 6714c2d3b02SDimitry Andric (instregex "^BCAX", "^EOR3", "^RAX1", "^XAR")>; 6724c2d3b02SDimitry Andric// -- SHA-512 hash 6734c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA512(H|H2)")>; 6744c2d3b02SDimitry Andric// -- SHA-512 schedule update 6754c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA512SU[01]")>; 6764c2d3b02SDimitry Andric// -- SHA1 choose/majority/parity 6774c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA1[CMP]")>; 6784c2d3b02SDimitry Andric// -- SHA1 hash/schedule update 6794c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA1SU[01]")>; 6804c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA1H")>; 6814c2d3b02SDimitry Andric// -- SM3 hash 6824c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 6834c2d3b02SDimitry Andric (instregex "^SM3PARTW[12]$", "^SM3SS1$", "^SM3TT[12][AB]$")>; 6844c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1X], (instrs SM4E, SM4ENCKEY)>; 6854c2d3b02SDimitry Andric 6864c2d3b02SDimitry Andric// FP and vector load instructions 6874c2d3b02SDimitry Andric// -- Load 1-element structure to one/all lanes 6884c2d3b02SDimitry Andric// ---- all lanes 6894c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_1L_1XY], 6904c2d3b02SDimitry Andric (instregex "^LD1Rv(8b|4h|2s|16b|8h|4s|2d)")>; 6914c2d3b02SDimitry Andric// ---- one lane 6924c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_1L_1XY], 6934c2d3b02SDimitry Andric (instregex "^LD1i(8|16|32|64)")>; 6944c2d3b02SDimitry Andric// -- Load 1-element structure to one/all lanes, 1D size 6954c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1L], 6964c2d3b02SDimitry Andric (instregex "^LD1Rv1d")>; 6974c2d3b02SDimitry Andric// -- Load 1-element structures to 1 register 6984c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1L], 6994c2d3b02SDimitry Andric (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 7004c2d3b02SDimitry Andric// -- Load 1-element structures to 2 registers 7014c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2L], 7024c2d3b02SDimitry Andric (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; 7034c2d3b02SDimitry Andric// -- Load 1-element structures to 3 registers 7044c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_3L], 7054c2d3b02SDimitry Andric (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 7064c2d3b02SDimitry Andric// -- Load 1-element structures to 4 registers 7074c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_4L], 7084c2d3b02SDimitry Andric (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; 7094c2d3b02SDimitry Andric// -- Load 2-element structure to all lanes of 2 registers, 1D size 7104c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2L], 7114c2d3b02SDimitry Andric (instregex "^LD2Rv1d")>; 7124c2d3b02SDimitry Andric// -- Load 2-element structure to all lanes of 2 registers, other sizes 7134c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2L_2XY], 7144c2d3b02SDimitry Andric (instregex "^LD2Rv(8b|4h|2s|16b|8h|4s|2d)")>; 7154c2d3b02SDimitry Andric// -- Load 2-element structure to one lane of 2 registers 7164c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2L_2XY], 7174c2d3b02SDimitry Andric (instregex "^LD2i(8|16|32|64)")>; 7184c2d3b02SDimitry Andric// -- Load 2-element structures to 2 registers, 16B/8H/4S/2D size 7194c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2L_2XY], 7204c2d3b02SDimitry Andric (instregex "^LD2Twov(16b|8h|4s|2d)")>; 7214c2d3b02SDimitry Andric// -- Load 2-element structures to 2 registers, 8B/4H/2S size 7224c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_2L_3XY], 7234c2d3b02SDimitry Andric (instregex "^LD2Twov(8b|4h|2s)")>; 7244c2d3b02SDimitry Andric// -- Load 3-element structure to all lanes of 3 registers, 1D size 7254c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_3L], 7264c2d3b02SDimitry Andric (instregex "^LD3Rv1d")>; 7274c2d3b02SDimitry Andric// -- Load 3-element structure to all lanes of 3 registers, other sizes 7284c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_3L_3XY], 7294c2d3b02SDimitry Andric (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s|2d)")>; 7304c2d3b02SDimitry Andric// -- Load 3-element structure to one lane of 3 registers 7314c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_3L_3XY], 7324c2d3b02SDimitry Andric (instregex "^LD3i(8|16|32|64)")>; 7334c2d3b02SDimitry Andric// -- Load 3-element structures to 3 registers, 16B/8H/4S sizes 7344c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_3L_3XY], 7354c2d3b02SDimitry Andric (instregex "^LD3Threev(16b|8h|4s)")>; 7364c2d3b02SDimitry Andric// -- Load 3-element structures to 3 registers, 2D size 7374c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_3L_3XY], 7384c2d3b02SDimitry Andric (instregex "^LD3Threev2d")>; 7394c2d3b02SDimitry Andric// -- Load 3-element structures to 3 registers, 8B/4H/2S sizes 7404c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_3L_3XY], 7414c2d3b02SDimitry Andric (instregex "^LD3Threev(8b|4h|2s)")>; 7424c2d3b02SDimitry Andric// -- Load 4-element structure to all lanes of 4 registers, 1D size 7434c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_4L], 7444c2d3b02SDimitry Andric (instregex "^LD4Rv1d")>; 7454c2d3b02SDimitry Andric// -- Load 4-element structure to all lanes of 4 registers, other sizes 7464c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_4L_4XY], 7474c2d3b02SDimitry Andric (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s|2d)")>; 7484c2d3b02SDimitry Andric// -- Load 4-element structure to one lane of 4 registers 7494c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_4L_4XY], 7504c2d3b02SDimitry Andric (instregex "^LD4i(8|16|32|64)")>; 7514c2d3b02SDimitry Andric// -- Load 4-element structures to 4 registers, 2D size 7524c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_4L_4XY], 7534c2d3b02SDimitry Andric (instregex "^LD4Fourv2d")>; 7544c2d3b02SDimitry Andric// -- Load 4-element structures to 4 registers, 2S size 7554c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_11cyc_4L_8XY], 7564c2d3b02SDimitry Andric (instregex "^LD4Fourv2s")>; 7574c2d3b02SDimitry Andric// -- Load 4-element structures to 4 registers, other sizes 7584c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_10cyc_4L_8XY], 7594c2d3b02SDimitry Andric (instregex "^LD4Fourv(8b|4h|16b|8h|4s)")>; 7604c2d3b02SDimitry Andric// -- Load pair, Q-form 7614c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2L], (instregex "LDN?PQ")>; 7624c2d3b02SDimitry Andric// -- Load pair, S/D-form 7634c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1L_1BS], (instregex "LDN?P(S|D)")>; 7644c2d3b02SDimitry Andric// -- Load register 7654c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1L], (instregex "LDU?R[BHSDQ]i")>; 7664c2d3b02SDimitry Andric// -- Load register, sign-extended register 7674c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1L], (instregex "LDR[BHSDQ]ro(W|X)")>; 7684c2d3b02SDimitry Andric 7694c2d3b02SDimitry Andric// FP and vector store instructions 7704c2d3b02SDimitry Andric// -- Store 1-element structure from one lane of 1 register 7714c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY_1S_1Z], 7724c2d3b02SDimitry Andric (instregex "^ST1i(8|16|32|64)")>; 7734c2d3b02SDimitry Andric// -- Store 1-element structures from 1 register 7744c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1S_1Z], 7754c2d3b02SDimitry Andric (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 7764c2d3b02SDimitry Andric// -- Store 1-element structures from 2 registers 7774c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_2S_2Z], 7784c2d3b02SDimitry Andric (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; 7794c2d3b02SDimitry Andric// -- Store 1-element structures from 3 registers 7804c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_3S_3Z], 7814c2d3b02SDimitry Andric (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 7824c2d3b02SDimitry Andric// -- Store 1-element structures from 4 registers 7834c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_4S_4Z], 7844c2d3b02SDimitry Andric (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; 7854c2d3b02SDimitry Andric// -- Store 2-element structure from one lane of 2 registers 7864c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_2XY_2S_2Z], 7874c2d3b02SDimitry Andric (instregex "^ST2i(8|16|32|64)")>; 7884c2d3b02SDimitry Andric// -- Store 2-element structures from 2 registers, 16B/8H/4S/2D sizes 7894c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_2XY_2S_2Z], 7904c2d3b02SDimitry Andric (instregex "^ST2Twov(16b|8h|4s|2d)")>; 7914c2d3b02SDimitry Andric// -- Store 2-element structures from 2 registers, 8B/4H/2S sizes 7924c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY_2S_2Z], 7934c2d3b02SDimitry Andric (instregex "^ST2Twov(8b|4h|2s)")>; 7944c2d3b02SDimitry Andric// -- Store 3-element structure from one lane of 3 registers 7954c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_3XY_3S_3Z], 7964c2d3b02SDimitry Andric (instregex "^ST3i(8|16|32|64)")>; 7974c2d3b02SDimitry Andric// -- Store 3-element structures from 3 registers 7984c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_3XY_3S_3Z], 7994c2d3b02SDimitry Andric (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 8004c2d3b02SDimitry Andric// -- Store 4-element structure from one lane of 4 registers 8014c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z], 8024c2d3b02SDimitry Andric (instregex "^ST4i(8|16|32|64)")>; 8034c2d3b02SDimitry Andric// -- Store 4-element structures from 4 registers, 16B/8H/4S sizes 8044c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z], 8054c2d3b02SDimitry Andric (instregex "^ST4Fourv(16b|8h|4s)")>; 8064c2d3b02SDimitry Andric// -- Store 4-element structures from 4 registers, 2D sizes 8074c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z], 8084c2d3b02SDimitry Andric (instregex "^ST4Fourv2d")>; 8094c2d3b02SDimitry Andric// -- Store 4-element structures from 4 registers, 8B/4H/2S sizes 8104c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_6XY_4S_4Z], 8114c2d3b02SDimitry Andric (instregex "^ST4Fourv(8b|4h|2s)")>; 8124c2d3b02SDimitry Andric// -- Store pair, Q-form 8134c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_2S_2Z], (instregex "^STN?PQ")>; 8144c2d3b02SDimitry Andric// -- Store pair, S/D-form 8154c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_2S_2Z], (instregex "^STN?P[SD]")>; 8164c2d3b02SDimitry Andric// -- Store register 8174c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1S_1Z], (instregex "^STU?R[BHSDQ](ui|i)")>; 8184c2d3b02SDimitry Andric// -- Store register, sign-extended register offset 8194c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1S_1Z], (instregex "^STR[BHSDQ]ro[XW]")>; 8204c2d3b02SDimitry Andric 8214c2d3b02SDimitry Andric// FP data processing, bfloat16 format 8224c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instrs BFCVT)>; 8234c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_2XY], (instrs BFCVTN, BFCVTN2)>; 8244c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^BFDOTv", "^BF16DOT")>; 8254c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instrs BFMMLA)>; 8264c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^BFMLAL")>; 8274c2d3b02SDimitry Andric 8284c2d3b02SDimitry Andric// FP data processing, scalar/vector, half precision 8294c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(ABD|ABS)v.[fi]16")>; 8304c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 8314c2d3b02SDimitry Andric (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi]16")>; 8324c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 8334c2d3b02SDimitry Andric (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi]16")>; 8344c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 8354c2d3b02SDimitry Andric (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)16")>; 8364c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1X], 8374c2d3b02SDimitry Andric (instregex "^FCMPE?H")>; 8384c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1X], 8394c2d3b02SDimitry Andric (instregex "^FCCMPE?H")>; 8404c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1XY], 8414c2d3b02SDimitry Andric (instregex "^FCSELH")>; 8424c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if]16")>; 8434c2d3b02SDimitry Andric// Convert FP to integer, H-form 8444c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^[SUd]CVTFv.[fi]16")>; 8454c2d3b02SDimitry Andric// Convert to FP from GPR, H-form 8464c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_1BS_1XY], (instregex "^[SU]CVTF_ZPmZ_[DSH]toH$")>; 8474c2d3b02SDimitry Andric// Convert to FP from GPR, fixed-point, H-form 8484c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_11cyc_1BS_2XY], (instregex "^[SU]CVTF[SU][WX]Hri$")>; 8494c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_1X], (instrs FDIVHrr)>; 8504c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_17cyc_1X], (instregex "^FDIVv.[if]16")>; 8514c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if]16")>; 8524c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv4[if]16")>; 8534c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_3XY], (instregex "^F(MAX|MIN)(NM)?Vv8[if]16")>; 8544c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FMULX?v.[if]16")>; 8554c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULX16)>; 8564c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[H]rrr")>; 8574c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FML[AS]v.[if]16")>; 8584c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRECPXv.[if]16")>; 8594c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^F(RECP|RSQRT)S16")>; 8604c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if]16")>; 8614c2d3b02SDimitry Andric// FP square root, H-form 8624c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_21cyc_1X], (instrs FSQRTHr)>; 8634c2d3b02SDimitry Andric// FP square root, vector-form, F16 8644c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_39cyc_1X], (instregex "^FSQRTv.f16")>; 8654c2d3b02SDimitry Andric 8664c2d3b02SDimitry Andric// FP data processing, scalar/vector, single/double precision 8674c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(ABD|ABS)v.[fi](32|64)")>; 8684c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 8694c2d3b02SDimitry Andric (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi](32|64)")>; 8704c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 8714c2d3b02SDimitry Andric (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi](32|64)")>; 8724c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 8734c2d3b02SDimitry Andric (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(32|64)")>; 8744c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1X], 8754c2d3b02SDimitry Andric (instregex "^FCMPE?(S|D)")>; 8764c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1X], 8774c2d3b02SDimitry Andric (instregex "^FCCMPE?(S|D)")>; 8784c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1XY], 8794c2d3b02SDimitry Andric (instregex "^FCSEL(S|D)")>; 8804c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if](32|64)")>; 8814c2d3b02SDimitry Andric// Convert FP to integer, S/D-form 8824c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^[SUd]CVTFv.[fi](32|64)")>; 8834c2d3b02SDimitry Andric// Convert to FP from GPR, S/D-form 8844c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_1BS_1XY], (instregex "^[SU]CVTF_ZPmZ_[DSH]to[DS]$")>; 8854c2d3b02SDimitry Andric// Convert to FP from GPR, fixed-point, S/D-form 8864c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_11cyc_1BS_2XY], (instregex "^[SU]CVTF[SU][WX][SD]ri$")>; 8874c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_19cyc_1X], (instregex "^FDIVv.[if](64)", "FDIVD")>; 8884c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_12cyc_1X], (instregex "^FDIVv.[if](32)", "FDIVS")>; 8894c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if](32|64)")>; 8904c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv.[if](32|64)")>; 8914c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FMULX?v.[if](32|64)")>; 8924c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULX32, FMULX64)>; 8934c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULSrr, FNMULSrr)>; 8944c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULDrr, FNMULDrr)>; 8954c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[SD]rrr")>; 8964c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FML[AS]v.[if](32|64)")>; 8974c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRECPXv.[if](32|64)")>; 8984c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(RECP|RSQRT)S(32|64)")>; 8994c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if](32|64)")>; 9004c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT(32|64)")>; 9014c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_63cyc_1X], (instregex "^FSQRTv.f64", "^FSQRTDr")>; 9024c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_33cyc_1X], (instregex "^FSQRTv.f32", "^FSQRTSr")>; 9034c2d3b02SDimitry Andric 9044c2d3b02SDimitry Andric// FP miscellaneous instructions 9054c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_1XY_1Z], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; 9064c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[HSD]Hr")>; 9074c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[HSD][SD]r")>; 9084c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVTLv")>; 9094c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT(N|XN)v")>; 9104c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_1X_1Z], (instrs FJCVTZS)>; 9114c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^FMOV[HSD][WX]r")>; 9124c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_1BS_1XY], (instregex "^FMOVDXHighr")>; 9134c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^FMOV[HSD][ri]")>; 9144c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1X], (instregex "^FMOVXDHighr")>; 9154c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1Z], (instregex "^FMOV[WX][HSD]r")>; 9164c2d3b02SDimitry Andric 9174c2d3b02SDimitry Andric// Integer arithmetic and logical instructions 9184c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], 9194c2d3b02SDimitry Andric (instregex "ADC(W|X)r", "SBC(W|X)r")>; 9204c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_Arith], 9214c2d3b02SDimitry Andric (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]")>; 9224c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], 9234c2d3b02SDimitry Andric (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[ri]")>; 9244c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_ArithFlagsetting], 9254c2d3b02SDimitry Andric (instregex "(ADD|AND|BIC|SUB)S[WX]r[sx]")>; 9264c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], 9274c2d3b02SDimitry Andric (instregex "(ADD|AND|BIC|SUB)S[WX]r[ri]")>; 9284c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], 9294c2d3b02SDimitry Andric (instregex "(ADC|SBC)S[WX]r")>; 9304c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], (instrs RMIF)>; 9314c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], 9324c2d3b02SDimitry Andric (instregex "(CCMN|CCMP)(X|W)")>; 9334c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], 9344c2d3b02SDimitry Andric (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)")>; 9354c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_13cyc_1BS_1X], (instrs SDIVWr, UDIVWr)>; 9364c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_19cyc_2BS_1X], (instrs SDIVXr, UDIVXr)>; 9374c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1BS], 9384c2d3b02SDimitry Andric (instregex "(S|U)MULHr")>; 9394c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1BS_1AB], 9404c2d3b02SDimitry Andric (instregex "(S|U)?M(ADD|SUB)L?r")>; 9414c2d3b02SDimitry Andric 9424c2d3b02SDimitry Andric// Integer load instructions 9434c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 9444c2d3b02SDimitry Andric (instregex "(LDNP|LDP|LDPSW)(X|W)")>; 9454c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 9464c2d3b02SDimitry Andric (instregex "LDR(B|D|H|Q|S)ui")>; 9474c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 9484c2d3b02SDimitry Andric (instregex "LDR(D|Q|W|X)l")>; 9494c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 9504c2d3b02SDimitry Andric (instregex "LDTR(B|H|W|X)i")>; 9514c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 9524c2d3b02SDimitry Andric (instregex "LDTRS(BW|BX|HW|HX|W)i")>; 9534c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 9544c2d3b02SDimitry Andric (instregex "LDUR(BB|HH|X|W)i")>; 9554c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 9564c2d3b02SDimitry Andric (instregex "LDURS(BW|BX|HW|HX|W)i")>; 9574c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1L], 9584c2d3b02SDimitry Andric (instregex "LDR(HH|SHW|SHX|W|X)ro(W|X)")>; 9594c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1L], 9604c2d3b02SDimitry Andric (instrs PRFMl, PRFUMi, PRFUMi)>; 9614c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1L], 9624c2d3b02SDimitry Andric (instrs PRFMroW, PRFMroX)>; 9634c2d3b02SDimitry Andric 9644c2d3b02SDimitry Andric// Integer miscellaneous instructions 9654c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], (instrs ADR, ADRP)>; 9664c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "EXTR(W|X)")>; 9674c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "(S|U)?BFM(W|X)")>; 9684c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1BS], (instregex "^CRC32C?[BHWX]")>; 9694c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "CLS(W|X)")>; 9704c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1A], (instrs SETF8, SETF16)>; 9714c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], 9724c2d3b02SDimitry Andric (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; 9734c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], 9744c2d3b02SDimitry Andric (instregex "(RBIT|REV|REV16)(W|X)r", "REV32Xr")>; 9754c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], 9764c2d3b02SDimitry Andric (instregex "(ASR|LSL|LSR|ROR)V(W|X)r")>; 9774c2d3b02SDimitry Andric 9784c2d3b02SDimitry Andric// Integer store instructions 9794c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S], (instregex "STNP(X|W)i")>; 9804c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S], (instrs STPXi)>; 9814c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1B_1S], (instrs STPWi)>; 9824c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1B_1S_1AB], (instregex "STP(W|X)(pre|post)")>; 9834c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1S], (instrs STTRBi, STTRHi, STTRWi, STTRXi)>; 9844c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1S], (instregex "STUR(BB|HH|X|W)i", 9854c2d3b02SDimitry Andric "STR(X|W)ui", 9864c2d3b02SDimitry Andric "STUR(BB|HH|X|W)i")>; 9874c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S], (instrs STRWroX, STRXroX)>; 9884c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S], (instrs STRWroW, STRXroW)>; 9894c2d3b02SDimitry Andric 9904c2d3b02SDimitry Andric// Memory tagging 9914c2d3b02SDimitry Andric 9924c2d3b02SDimitry Andric// Insert Random Tags 9934c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1BS_1B], (instrs IRG, IRGstack)>; 9944c2d3b02SDimitry Andric// Load allocation tag 9954c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_1L_1B], (instrs LDG, LDGM)>; 9964c2d3b02SDimitry Andric// Store allocation tags 9974c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1S], 9984c2d3b02SDimitry Andric (instrs STGi, STGM, STGPreIndex, STGPostIndex)>; 9994c2d3b02SDimitry Andric// Store allocation tags and pair of registers 10004c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S], 10014c2d3b02SDimitry Andric (instrs STGPi, STGPpre, STGPpost)>; 10024c2d3b02SDimitry Andric// Store allocation tags and zero data 10034c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1S], 10044c2d3b02SDimitry Andric (instrs STZGi, STZGM, STZGPreIndex, STZGPostIndex)>; 10054c2d3b02SDimitry Andric// Store two tags 10064c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S], 10074c2d3b02SDimitry Andric (instrs ST2Gi, ST2GPreIndex, ST2GPostIndex)>; 10084c2d3b02SDimitry Andric// Store two tags and zero data 10094c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_2S], 10104c2d3b02SDimitry Andric (instrs STZ2Gi, STZ2GPreIndex, STZ2GPostIndex)>; 10114c2d3b02SDimitry Andric// Subtract Pointer 10124c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs SUBP)>; 10134c2d3b02SDimitry Andric// Subtract Pointer, flagset 10144c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs SUBPS)>; 10154c2d3b02SDimitry Andric// Insert Tag Mask 10164c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs GMI)>; 10174c2d3b02SDimitry Andric// Arithmetic, immediate to logical address tag 10184c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], (instrs ADDG, SUBG)>; 10194c2d3b02SDimitry Andric 10204c2d3b02SDimitry Andric// Pointer authentication 10214c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^AUT")>; 10224c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_1BS_1A], 10234c2d3b02SDimitry Andric (instregex "BRA(A|AZ|B|BZ)", "RETA(A|B)", "ERETA(A|B)")>; 10244c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_1BS_2A], 10254c2d3b02SDimitry Andric (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>; 10264c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^PAC")>; 10274c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_1BS_1L], (instregex "^LDRA(A|B)")>; 10284c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_1cyc_1B], (instrs XPACD, XPACI)>; 10294c2d3b02SDimitry Andric 10304c2d3b02SDimitry Andric// Vector integer instructions 10314c2d3b02SDimitry Andric// -- absolute difference 10324c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 10334c2d3b02SDimitry Andric (instregex "^SABAv", "^SABALv", "^SABDv", "^SABDLv", 10344c2d3b02SDimitry Andric "^UABAv", "^UABALv", "^UABDv", "^UABDLv")>; 10354c2d3b02SDimitry Andric// -- arithmetic 10364c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 10374c2d3b02SDimitry Andric (instregex "^ABSv", "^(ADD|SUB)v", "^SADDLv", "^SADDW", "SHADD", 10384c2d3b02SDimitry Andric "SHSUB", "^SRHADD", "^URHADD", "SSUBL", "SSUBW", 10394c2d3b02SDimitry Andric "^UADDLv", "^UADDW", "UHADD", "UHSUB", "USUBL", "USUBW")>; 10404c2d3b02SDimitry Andric// -- arithmetic, horizontal, 16B 10414c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_4XY], 10424c2d3b02SDimitry Andric (instregex "^ADDVv16i8v", "^SADDLVv16i8v", "^UADDLVv16i8v")>; 10434c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_4XY], 10444c2d3b02SDimitry Andric (instregex "^[SU](MIN|MAX)Vv16i8v")>; 10454c2d3b02SDimitry Andric// -- arithmetic, horizontal, 4H/4S 10464c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2XY], 10474c2d3b02SDimitry Andric (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v")>; 10484c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2XY], 10494c2d3b02SDimitry Andric (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v")>; 10504c2d3b02SDimitry Andric// -- arithmetic, horizontal, 8B/8H 10514c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_3XY], 10524c2d3b02SDimitry Andric (instregex "^[SU]?ADDL?V(v8i16|v4i32)v")>; 10534c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_3XY], 10544c2d3b02SDimitry Andric (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v")>; 10554c2d3b02SDimitry Andric// -- arithmetic, narrowing 10564c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "(ADD|SUB)HNv.*")>; 10574c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "(RADD|RSUB)HNv.*")>; 10584c2d3b02SDimitry Andric// -- arithmetic, pairwise 10594c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 10604c2d3b02SDimitry Andric (instregex "^ADDPv", "^SADALP", "^UADALP", "^SADDLPv", "^UADDLPv")>; 10614c2d3b02SDimitry Andric// -- arithmetic, saturating 10624c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 10634c2d3b02SDimitry Andric (instregex "^SQADD", "^SQSUB", "^SUQADD", "^UQADD", "^UQSUB", "^USQADD")>; 10644c2d3b02SDimitry Andric// -- bit count 10654c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 10664c2d3b02SDimitry Andric (instregex "^(CLS|CLZ|CNT)v")>; 10674c2d3b02SDimitry Andric// -- compare 10684c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 10694c2d3b02SDimitry Andric (instregex "^CMEQv", "^CMGEv", "^CMGTv", "^CMLEv", "^CMLTv", 10704c2d3b02SDimitry Andric "^CMHIv", "^CMHSv")>; 10714c2d3b02SDimitry Andric// -- compare non-zero 10724c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^CMTSTv")>; 10734c2d3b02SDimitry Andric// -- dot product 10744c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^(S|SU|U|US)DOTv")>; 10754c2d3b02SDimitry Andric// -- fp reciprocal estimate 10764c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_1X], (instregex "^FRECPEv", "^FRSQRTEv")>; 10774c2d3b02SDimitry Andric// -- integer reciprocal estimate 10784c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^URECPEv", "^URSQRTEv")>; 10794c2d3b02SDimitry Andric// -- logical 10804c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 10814c2d3b02SDimitry Andric (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; 10824c2d3b02SDimitry Andric// -- logical, narrowing 10834c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_2XY], 10844c2d3b02SDimitry Andric (instregex "RSHRNv", 10854c2d3b02SDimitry Andric "SHRNv", "SQSHRNv", "SQSHRUNv", 10864c2d3b02SDimitry Andric "UQXTNv")>; 10874c2d3b02SDimitry Andric// -- matrix multiply 10884c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 10894c2d3b02SDimitry Andric (instrs SMMLA, UMMLA, USMMLA)>; 10904c2d3b02SDimitry Andric// -- max/min 10914c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 10924c2d3b02SDimitry Andric (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>; 10934c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 10944c2d3b02SDimitry Andric (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>; 10954c2d3b02SDimitry Andric// -- move immediate 10964c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^MOVIv", "^MVNIv")>; 10974c2d3b02SDimitry Andric// -- multiply 10984c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 10994c2d3b02SDimitry Andric (instregex "MULv", "SMULLv", "UMULLv", "SQDMUL(H|L)v", "SQRDMULHv")>; 11004c2d3b02SDimitry Andric// -- multiply accumulate 11014c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_3cyc_1XY], 11024c2d3b02SDimitry Andric (instregex "MLAv", "MLSv", "(S|U|SQD)(MLAL|MLSL)v", "SQRDML(A|S)Hv")>; 11034c2d3b02SDimitry Andric// -- negation, saturating 11044c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^SQABS", "^SQNEG")>; 11054c2d3b02SDimitry Andric// -- reverse bits/bytes 11064c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 11074c2d3b02SDimitry Andric (instregex "^RBITv", "^REV16v", "^REV32v", "^REV64v")>; 11084c2d3b02SDimitry Andric// -- shift 11094c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; 11104c2d3b02SDimitry Andric// -- shift and accumulate 11114c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 11124c2d3b02SDimitry Andric (instregex "SRSRAv", "SSRAv", "URSRAv", "USRAv")>; 11134c2d3b02SDimitry Andric// -- shift, saturating 11144c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 11154c2d3b02SDimitry Andric (instregex "^SQRSHLv", "^SQRSHRNv", "^SQRSHRUNv", "^SQSHL", "^SQSHLU", 11164c2d3b02SDimitry Andric "^SQXTNv", "^SQXTUNv", "^UQSHRNv", "UQRSHRNv", "^UQRSHL", 11174c2d3b02SDimitry Andric "^UQSHL")>; 11184c2d3b02SDimitry Andric 11194c2d3b02SDimitry Andric// Vector miscellaneous instructions 11204c2d3b02SDimitry Andric// -- duplicate element 11214c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^DUPv.+lane")>; 11224c2d3b02SDimitry Andric// -- duplicate from GPR 11234c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^DUPv.+gpr")>; 11244c2d3b02SDimitry Andric// -- extract narrow 11254c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^XTNv")>; 11264c2d3b02SDimitry Andric// -- insert/extract element 11274c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^EXTv", "^INSv.+lane")>; 11284c2d3b02SDimitry Andric// -- move FP immediate 11294c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^FMOVv")>; 11304c2d3b02SDimitry Andric// -- move element to GPR 11314c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_5cyc_1X], (instregex "(S|U)MOVv")>; 11324c2d3b02SDimitry Andric// -- move from GPR to any element 11334c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_7cyc_1BS_1XY], (instregex "^INSv.+gpr")>; 11344c2d3b02SDimitry Andric// -- table lookup 11354c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 11364c2d3b02SDimitry Andric (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>; 11374c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_4cyc_2XY], 11384c2d3b02SDimitry Andric (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>; 11394c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_6cyc_3XY], 11404c2d3b02SDimitry Andric (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>; 11414c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_8cyc_4XY], 11424c2d3b02SDimitry Andric (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>; 11434c2d3b02SDimitry Andric// -- transpose 11444c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], 11454c2d3b02SDimitry Andric (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>; 11464c2d3b02SDimitry Andric// -- zip/unzip 11474c2d3b02SDimitry Andricdef : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^ZIP1v", "^ZIP2v")>; 11484c2d3b02SDimitry Andric 11494c2d3b02SDimitry Andric} // SchedModel = Ampere1BModel 1150