1//=- AArch64SchedNeoverseV1.td - NeoverseV1 Scheduling Model -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Arm Neoverse V1 processors. 10// 11// References: 12// - "Arm Neoverse V1 Software Optimization Guide" 13// - "Arm Neoverse V1 Platform: Unleashing a new performance tier for Arm-based computing" 14// https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/neoverse-v1-platform-a-new-performance-tier-for-arm 15// - "Neoverse V1" 16// https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_v1 17 18// 19//===----------------------------------------------------------------------===// 20 21def NeoverseV1Model : SchedMachineModel { 22 let IssueWidth = 15; // Maximum micro-ops dispatch rate. 23 let MicroOpBufferSize = 256; // Micro-op re-order buffer. 24 let LoadLatency = 4; // Optimistic load latency. 25 let MispredictPenalty = 11; // Cycles cost of branch mispredicted. 26 let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. 27 let CompleteModel = 1; 28 29 list<Predicate> UnsupportedFeatures = !listconcat(SVE2Unsupported.F, 30 SMEUnsupported.F, 31 [HasMTE, HasCPA, 32 HasCSSC]); 33} 34 35//===----------------------------------------------------------------------===// 36// Define each kind of processor resource and number available on Neoverse V1. 37// Instructions are first fetched and then decoded into internal macro-ops 38// (MOPs). From there, the MOPs proceed through register renaming and dispatch 39// stages. A MOP can be split into one or more micro-ops further down the 40// pipeline, after the decode stage. Once dispatched, micro-ops wait for their 41// operands and issue out-of-order to one of the issue pipelines. Each issue 42// pipeline can accept one micro-op per cycle. 43 44let SchedModel = NeoverseV1Model in { 45 46// Define the issue ports. 47def V1UnitB : ProcResource<2>; // Branch 0/1 48def V1UnitS : ProcResource<2>; // Integer single cycle 0/1 49def V1UnitM0 : ProcResource<1>; // Integer multicycle 0 50def V1UnitM1 : ProcResource<1>; // Integer multicycle 1 51def V1UnitL01 : ProcResource<2>; // Load/Store 0/1 52def V1UnitL2 : ProcResource<1>; // Load 2 53def V1UnitD : ProcResource<2>; // Store data 0/1 54def V1UnitV0 : ProcResource<1>; // FP/ASIMD 0 55def V1UnitV1 : ProcResource<1>; // FP/ASIMD 1 56def V1UnitV2 : ProcResource<1>; // FP/ASIMD 2 57def V1UnitV3 : ProcResource<1>; // FP/ASIMD 3 58def V1UnitFlg : ProcResource<3>; // Flags 59 60def V1UnitI : ProcResGroup<[V1UnitS, 61 V1UnitM0, V1UnitM1]>; // Integer units 62def V1UnitM : ProcResGroup<[V1UnitM0, V1UnitM1]>; // Integer multicycle units 63def V1UnitL : ProcResGroup<[V1UnitL01, V1UnitL2]>; // Load units 64def V1UnitV : ProcResGroup<[V1UnitV0, V1UnitV1, 65 V1UnitV2, V1UnitV3]>; // FP/ASIMD units 66def V1UnitV01 : ProcResGroup<[V1UnitV0, V1UnitV1]>; // FP/ASIMD 0/1 units 67def V1UnitV02 : ProcResGroup<[V1UnitV0, V1UnitV2]>; // FP/ASIMD 0/2 units 68def V1UnitV13 : ProcResGroup<[V1UnitV1, V1UnitV3]>; // FP/ASIMD 1/3 units 69 70// Define commonly used read types. 71 72// No generic forwarding is provided for these types. 73def : ReadAdvance<ReadI, 0>; 74def : ReadAdvance<ReadISReg, 0>; 75def : ReadAdvance<ReadIEReg, 0>; 76def : ReadAdvance<ReadIM, 0>; 77def : ReadAdvance<ReadIMA, 0>; 78def : ReadAdvance<ReadID, 0>; 79def : ReadAdvance<ReadExtrHi, 0>; 80def : ReadAdvance<ReadAdrBase, 0>; 81def : ReadAdvance<ReadST, 0>; 82def : ReadAdvance<ReadVLD, 0>; 83 84def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 85def : WriteRes<WriteBarrier, []> { let Latency = 1; } 86def : WriteRes<WriteHint, []> { let Latency = 1; } 87 88 89//===----------------------------------------------------------------------===// 90// Define generic 0 micro-op types 91 92let Latency = 0, NumMicroOps = 0 in 93def V1Write_0c_0Z : SchedWriteRes<[]>; 94 95 96//===----------------------------------------------------------------------===// 97// Define generic 1 micro-op types 98 99def V1Write_1c_1B : SchedWriteRes<[V1UnitB]> { let Latency = 1; } 100def V1Write_1c_1I : SchedWriteRes<[V1UnitI]> { let Latency = 1; } 101def V1Write_1c_1I_1Flg : SchedWriteRes<[V1UnitI, V1UnitFlg]> { let Latency = 1; } 102def V1Write_4c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 4; } 103def V1Write_6c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6; } 104def V1Write_1c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 1; } 105def V1Write_4c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 4; } 106def V1Write_6c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 6; } 107def V1Write_2c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 2; } 108def V1Write_2c_1M_1Flg : SchedWriteRes<[V1UnitM, V1UnitFlg]> { let Latency = 2; } 109def V1Write_3c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 3; } 110def V1Write_4c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 4; } 111def V1Write_1c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 1; } 112def V1Write_2c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 2; } 113def V1Write_3c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 3; } 114def V1Write_5c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 5; } 115def V1Write_12c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 12; 116 let ReleaseAtCycles = [5]; } 117def V1Write_20c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 20; 118 let ReleaseAtCycles = [5]; } 119def V1Write_2c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 2; } 120def V1Write_3c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 3; } 121def V1Write_4c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 4; } 122def V1Write_5c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 5; } 123def V1Write_2c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 2; } 124def V1Write_3c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 3; } 125def V1Write_4c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 4; } 126def V1Write_6c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 6; } 127def V1Write_10c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 10; 128 let ReleaseAtCycles = [7]; } 129def V1Write_12c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 12; 130 let ReleaseAtCycles = [7]; } 131def V1Write_13c10_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 13; 132 let ReleaseAtCycles = [10]; } 133def V1Write_15c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 15; 134 let ReleaseAtCycles = [7]; } 135def V1Write_16c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 16; 136 let ReleaseAtCycles = [7]; } 137def V1Write_20c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 20; 138 let ReleaseAtCycles = [7]; } 139def V1Write_2c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 2; } 140def V1Write_3c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 3; } 141def V1Write_4c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4; } 142def V1Write_5c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 5; } 143def V1Write_3c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 3; } 144def V1Write_4c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 4; } 145def V1Write_7c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 7; 146 let ReleaseAtCycles = [7]; } 147def V1Write_10c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10; 148 let ReleaseAtCycles = [7]; } 149def V1Write_13c5_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13; 150 let ReleaseAtCycles = [5]; } 151def V1Write_13c11_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13; 152 let ReleaseAtCycles = [11]; } 153def V1Write_15c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 15; 154 let ReleaseAtCycles = [7]; } 155def V1Write_16c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16; 156 let ReleaseAtCycles = [7]; } 157def V1Write_2c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 2; } 158def V1Write_3c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 3; } 159def V1Write_4c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 4; } 160def V1Write_2c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 2; } 161def V1Write_4c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 4; } 162 163//===----------------------------------------------------------------------===// 164// Define generic 2 micro-op types 165 166let Latency = 1, NumMicroOps = 2 in 167def V1Write_1c_1B_1S : SchedWriteRes<[V1UnitB, V1UnitS]>; 168let Latency = 6, NumMicroOps = 2 in 169def V1Write_6c_1B_1M0 : SchedWriteRes<[V1UnitB, V1UnitM0]>; 170let Latency = 3, NumMicroOps = 2 in 171def V1Write_3c_1I_1M : SchedWriteRes<[V1UnitI, V1UnitM]>; 172let Latency = 5, NumMicroOps = 2 in 173def V1Write_5c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>; 174let Latency = 7, NumMicroOps = 2 in 175def V1Write_7c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>; 176let Latency = 6, NumMicroOps = 2 in 177def V1Write_6c_2L : SchedWriteRes<[V1UnitL, V1UnitL]>; 178let Latency = 6, NumMicroOps = 2 in 179def V1Write_6c_1L_1M : SchedWriteRes<[V1UnitL, V1UnitM]>; 180let Latency = 8, NumMicroOps = 2 in 181def V1Write_8c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>; 182let Latency = 9, NumMicroOps = 2 in 183def V1Write_9c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>; 184let Latency = 11, NumMicroOps = 2 in 185def V1Write_11c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>; 186let Latency = 1, NumMicroOps = 2 in 187def V1Write_1c_1L01_1D : SchedWriteRes<[V1UnitL01, V1UnitD]>; 188let Latency = 6, NumMicroOps = 2 in 189def V1Write_6c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>; 190let Latency = 7, NumMicroOps = 2 in 191def V1Write_7c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>; 192let Latency = 2, NumMicroOps = 2 in 193def V1Write_2c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>; 194let Latency = 4, NumMicroOps = 2 in 195def V1Write_4c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>; 196let Latency = 6, NumMicroOps = 2 in 197def V1Write_6c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>; 198let Latency = 2, NumMicroOps = 2 in 199def V1Write_2c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>; 200let Latency = 4, NumMicroOps = 2 in 201def V1Write_4c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>; 202let Latency = 2, NumMicroOps = 2 in 203def V1Write_2c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>; 204let Latency = 3, NumMicroOps = 2 in 205def V1Write_3c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>; 206let Latency = 9, NumMicroOps = 2 in 207def V1Write_9c_1M0_1L : SchedWriteRes<[V1UnitM0, V1UnitL]>; 208let Latency = 5, NumMicroOps = 2 in 209def V1Write_5c_1M0_1V : SchedWriteRes<[V1UnitM0, V1UnitV]>; 210let Latency = 4, NumMicroOps = 2 in 211def V1Write_4c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV0]>; 212let Latency = 7, NumMicroOps = 2 in 213def V1Write_7c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV1]>; 214let Latency = 5, NumMicroOps = 2 in 215def V1Write_5c_1M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitV01]>; 216let Latency = 6, NumMicroOps = 2 in 217def V1Write_6c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>; 218let Latency = 9, NumMicroOps = 2 in 219def V1Write_9c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>; 220let Latency = 4, NumMicroOps = 2 in 221def V1Write_4c_2V : SchedWriteRes<[V1UnitV, V1UnitV]>; 222let Latency = 8, NumMicroOps = 2 in 223def V1Write_8c_1V_1V01 : SchedWriteRes<[V1UnitV, V1UnitV01]>; 224let Latency = 4, NumMicroOps = 2 in 225def V1Write_4c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>; 226let Latency = 5, NumMicroOps = 2 in 227def V1Write_5c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>; 228let Latency = 2, NumMicroOps = 2 in 229def V1Write_2c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>; 230let Latency = 4, NumMicroOps = 2 in 231def V1Write_4c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>; 232let Latency = 4, NumMicroOps = 2 in 233def V1Write_4c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>; 234let Latency = 6, NumMicroOps = 2 in 235def V1Write_6c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>; 236let Latency = 4, NumMicroOps = 2 in 237def V1Write_4c_1V13_1V : SchedWriteRes<[V1UnitV13, V1UnitV]>; 238let Latency = 4, NumMicroOps = 2 in 239def V1Write_4c_2V13 : SchedWriteRes<[V1UnitV13, V1UnitV13]>; 240 241//===----------------------------------------------------------------------===// 242// Define generic 3 micro-op types 243 244let Latency = 2, NumMicroOps = 3 in 245def V1Write_2c_1I_1L01_1V01 : SchedWriteRes<[V1UnitI, V1UnitL01, V1UnitV01]>; 246let Latency = 7, NumMicroOps = 3 in 247def V1Write_7c_2M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitM0, V1UnitV01]>; 248let Latency = 8, NumMicroOps = 3 in 249def V1Write_8c_1L_2V : SchedWriteRes<[V1UnitL, V1UnitV, V1UnitV]>; 250let Latency = 6, NumMicroOps = 3 in 251def V1Write_6c_3L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL]>; 252let Latency = 2, NumMicroOps = 3 in 253def V1Write_2c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>; 254let Latency = 4, NumMicroOps = 3 in 255def V1Write_4c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>; 256let Latency = 2, NumMicroOps = 3 in 257def V1Write_2c_2L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitV01]>; 258let Latency = 6, NumMicroOps = 3 in 259def V1Write_6c_3V : SchedWriteRes<[V1UnitV, V1UnitV, V1UnitV]>; 260let Latency = 4, NumMicroOps = 3 in 261def V1Write_4c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>; 262let Latency = 6, NumMicroOps = 3 in 263def V1Write_6c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>; 264let Latency = 8, NumMicroOps = 3 in 265def V1Write_8c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>; 266 267//===----------------------------------------------------------------------===// 268// Define generic 4 micro-op types 269 270let Latency = 8, NumMicroOps = 4 in 271def V1Write_8c_2M0_2V0 : SchedWriteRes<[V1UnitM0, V1UnitM0, 272 V1UnitV0, V1UnitV0]>; 273let Latency = 7, NumMicroOps = 4 in 274def V1Write_7c_4L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, V1UnitL]>; 275let Latency = 8, NumMicroOps = 4 in 276def V1Write_8c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL, 277 V1UnitV, V1UnitV]>; 278let Latency = 9, NumMicroOps = 4 in 279def V1Write_9c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL, 280 V1UnitV, V1UnitV]>; 281let Latency = 11, NumMicroOps = 4 in 282def V1Write_11c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL, 283 V1UnitV, V1UnitV]>; 284let Latency = 10, NumMicroOps = 4 in 285def V1Write_10c_2L01_2V : SchedWriteRes<[V1UnitL01, V1UnitL01, 286 V1UnitV, V1UnitV]>; 287let Latency = 2, NumMicroOps = 4 in 288def V1Write_2c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 289 V1UnitV01, V1UnitV01]>; 290let Latency = 4, NumMicroOps = 4 in 291def V1Write_4c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 292 V1UnitV01, V1UnitV01]>; 293let Latency = 8, NumMicroOps = 4 in 294def V1Write_8c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 295 V1UnitV01, V1UnitV01]>; 296let Latency = 9, NumMicroOps = 4 in 297def V1Write_9c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 298 V1UnitV01, V1UnitV01]>; 299let Latency = 10, NumMicroOps = 4 in 300def V1Write_10c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 301 V1UnitV01, V1UnitV01]>; 302let Latency = 10, NumMicroOps = 4 in 303def V1Write_10c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01, 304 V1UnitV1, V1UnitV1]>; 305let Latency = 12, NumMicroOps = 4 in 306def V1Write_12c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01, 307 V1UnitV1, V1UnitV1]>; 308let Latency = 6, NumMicroOps = 4 in 309def V1Write_6c_4V0 : SchedWriteRes<[V1UnitV0, V1UnitV0, 310 V1UnitV0, V1UnitV0]>; 311let Latency = 12, NumMicroOps = 4 in 312def V1Write_12c_4V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, 313 V1UnitV01, V1UnitV01]>; 314let Latency = 6, NumMicroOps = 4 in 315def V1Write_6c_4V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>; 316 317//===----------------------------------------------------------------------===// 318// Define generic 5 micro-op types 319 320let Latency = 8, NumMicroOps = 5 in 321def V1Write_8c_2L_3V : SchedWriteRes<[V1UnitL, V1UnitL, 322 V1UnitV, V1UnitV, V1UnitV]>; 323let Latency = 14, NumMicroOps = 5 in 324def V1Write_14c_1V_1V0_2V1_1V13 : SchedWriteRes<[V1UnitV, 325 V1UnitV0, 326 V1UnitV1, V1UnitV1, 327 V1UnitV13]>; 328let Latency = 9, NumMicroOps = 5 in 329def V1Write_9c_1V_4V01 : SchedWriteRes<[V1UnitV, 330 V1UnitV01, V1UnitV01, 331 V1UnitV01, V1UnitV01]>; 332let Latency = 6, NumMicroOps = 5 in 333def V1Write_6c_5V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, 334 V1UnitV01, V1UnitV01, V1UnitV01]>; 335 336//===----------------------------------------------------------------------===// 337// Define generic 6 micro-op types 338 339let Latency = 6, NumMicroOps = 6 in 340def V1Write_6c_3L_3V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, 341 V1UnitV, V1UnitV, V1UnitV]>; 342let Latency = 8, NumMicroOps = 6 in 343def V1Write_8c_3L_3V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, 344 V1UnitV, V1UnitV, V1UnitV]>; 345let Latency = 2, NumMicroOps = 6 in 346def V1Write_2c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 347 V1UnitV01, V1UnitV01, V1UnitV01]>; 348let Latency = 5, NumMicroOps = 6 in 349def V1Write_5c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 350 V1UnitV01, V1UnitV01, V1UnitV01]>; 351let Latency = 6, NumMicroOps = 6 in 352def V1Write_6c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 353 V1UnitV01, V1UnitV01, V1UnitV01]>; 354let Latency = 11, NumMicroOps = 6 in 355def V1Write_11c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 356 V1UnitV01, V1UnitV01, V1UnitV01]>; 357let Latency = 11, NumMicroOps = 6 in 358def V1Write_11c_1V_5V01 : SchedWriteRes<[V1UnitV, 359 V1UnitV01, V1UnitV01, 360 V1UnitV01, V1UnitV01, V1UnitV01]>; 361let Latency = 13, NumMicroOps = 6 in 362def V1Write_13c_6V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01, 363 V1UnitV01, V1UnitV01, V1UnitV01]>; 364 365//===----------------------------------------------------------------------===// 366// Define generic 7 micro-op types 367 368let Latency = 8, NumMicroOps = 7 in 369def V1Write_8c_3L_4V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, 370 V1UnitV, V1UnitV, V1UnitV, V1UnitV]>; 371let Latency = 8, NumMicroOps = 7 in 372def V1Write_13c_3L01_1S_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 373 V1UnitS, 374 V1UnitV01, V1UnitV01, V1UnitV01]>; 375 376//===----------------------------------------------------------------------===// 377// Define generic 8 micro-op types 378 379let Latency = 9, NumMicroOps = 8 in 380def V1Write_9c_4L_4V : SchedWriteRes<[V1UnitL, V1UnitL, 381 V1UnitL, V1UnitL, 382 V1UnitV, V1UnitV, 383 V1UnitV, V1UnitV]>; 384let Latency = 2, NumMicroOps = 8 in 385def V1Write_2c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 386 V1UnitL01, V1UnitL01, 387 V1UnitV01, V1UnitV01, 388 V1UnitV01, V1UnitV01]>; 389let Latency = 4, NumMicroOps = 8 in 390def V1Write_4c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 391 V1UnitL01, V1UnitL01, 392 V1UnitV01, V1UnitV01, 393 V1UnitV01, V1UnitV01]>; 394let Latency = 12, NumMicroOps = 8 in 395def V1Write_12c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 396 V1UnitL01, V1UnitL01, 397 V1UnitV01, V1UnitV01, 398 V1UnitV01, V1UnitV01]>; 399 400//===----------------------------------------------------------------------===// 401// Define generic 10 micro-op types 402 403let Latency = 13, NumMicroOps = 10 in 404def V1Write_13c_4L01_2S_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 405 V1UnitL01, V1UnitL01, 406 V1UnitS, V1UnitS, 407 V1UnitV01, V1UnitV01, 408 V1UnitV01, V1UnitV01]>; 409let Latency = 7, NumMicroOps = 10 in 410def V1Write_7c_5L01_5V : SchedWriteRes<[V1UnitL01, V1UnitL01, 411 V1UnitL01, V1UnitL01, V1UnitL01, 412 V1UnitV, V1UnitV, 413 V1UnitV, V1UnitV, V1UnitV]>; 414let Latency = 11, NumMicroOps = 10 in 415def V1Write_11c_10V0 : SchedWriteRes<[V1UnitV0, 416 V1UnitV0, V1UnitV0, V1UnitV0, 417 V1UnitV0, V1UnitV0, V1UnitV0, 418 V1UnitV0, V1UnitV0, V1UnitV0]>; 419 420//===----------------------------------------------------------------------===// 421// Define generic 12 micro-op types 422 423let Latency = 7, NumMicroOps = 12 in 424def V1Write_7c_6L01_6V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 425 V1UnitL01, V1UnitL01, V1UnitL01, 426 V1UnitV01, V1UnitV01, V1UnitV01, 427 V1UnitV01, V1UnitV01, V1UnitV01]>; 428 429//===----------------------------------------------------------------------===// 430// Define generic 15 micro-op types 431 432let Latency = 7, NumMicroOps = 15 in 433def V1Write_7c_5L01_5S_5V : SchedWriteRes<[V1UnitL01, V1UnitL01, 434 V1UnitL01, V1UnitL01, V1UnitL01, 435 V1UnitS, V1UnitS, 436 V1UnitS, V1UnitS, V1UnitS, 437 V1UnitV, V1UnitV, 438 V1UnitV, V1UnitV, V1UnitV]>; 439 440 441//===----------------------------------------------------------------------===// 442// Define generic 18 micro-op types 443 444let Latency = 19, NumMicroOps = 18 in 445def V1Write_11c_9L01_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 446 V1UnitL01, V1UnitL01, V1UnitL01, 447 V1UnitL01, V1UnitL01, V1UnitL01, 448 V1UnitV, V1UnitV, V1UnitV, 449 V1UnitV, V1UnitV, V1UnitV, 450 V1UnitV, V1UnitV, V1UnitV]>; 451let Latency = 19, NumMicroOps = 18 in 452def V1Write_19c_18V0 : SchedWriteRes<[V1UnitV0, V1UnitV0, V1UnitV0, 453 V1UnitV0, V1UnitV0, V1UnitV0, 454 V1UnitV0, V1UnitV0, V1UnitV0, 455 V1UnitV0, V1UnitV0, V1UnitV0, 456 V1UnitV0, V1UnitV0, V1UnitV0, 457 V1UnitV0, V1UnitV0, V1UnitV0]>; 458 459//===----------------------------------------------------------------------===// 460// Define generic 27 micro-op types 461 462let Latency = 11, NumMicroOps = 27 in 463def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 464 V1UnitL01, V1UnitL01, V1UnitL01, 465 V1UnitL01, V1UnitL01, V1UnitL01, 466 V1UnitS, V1UnitS, V1UnitS, 467 V1UnitS, V1UnitS, V1UnitS, 468 V1UnitS, V1UnitS, V1UnitS, 469 V1UnitV, V1UnitV, V1UnitV, 470 V1UnitV, V1UnitV, V1UnitV, 471 V1UnitV, V1UnitV, V1UnitV]>; 472 473//===----------------------------------------------------------------------===// 474// Define forwarded types 475 476// NOTE: SOG, p. 20, n. 2: Accumulator forwarding is not supported for 477// consumers of 64 bit multiply high operations? 478def V1Wr_IM : SchedWriteRes<[V1UnitM]> { let Latency = 2; } 479def V1Wr_IMA : SchedWriteRes<[V1UnitM0]> { let Latency = 2; } 480def V1WriteIM : SchedWriteVariant< 481 [SchedVar<NeoverseMULIdiomPred, [V1Wr_IM]>, 482 SchedVar<NoSchedPred, [V1Wr_IMA]>]>; 483def V1Rd_IMA : SchedReadAdvance<1, [V1Wr_IMA]>; 484 485def V1Wr_FMA : SchedWriteRes<[V1UnitV]> { let Latency = 4; } 486def V1Rd_FMA : SchedReadAdvance<2, [WriteFMul, V1Wr_FMA]>; 487 488def V1Wr_ADA : SchedWriteRes<[V1UnitV13]> { let Latency = 4; } 489def V1Rd_ADA : SchedReadAdvance<3, [V1Wr_ADA]>; 490 491def V1Wr_VDOT : SchedWriteRes<[V1UnitV]> { let Latency = 3; } 492def V1Rd_VDOT : SchedReadAdvance<2, [V1Wr_VDOT]>; 493 494def V1Wr_VMMA : SchedWriteRes<[V1UnitV]> { let Latency = 3; } 495def V1Rd_VMMA : SchedReadAdvance<2, [V1Wr_VMMA]>; 496 497def V1Wr_VMA : SchedWriteRes<[V1UnitV02]> { let Latency = 4; } 498def V1Rd_VMA : SchedReadAdvance<3, [V1Wr_VMA]>; 499 500def V1Wr_VMAL : SchedWriteRes<[V1UnitV02]> { let Latency = 4; } 501def V1Rd_VMAL : SchedReadAdvance<3, [V1Wr_VMAL]>; 502 503def V1Wr_VSA : SchedWriteRes<[V1UnitV13]> { let Latency = 4; } 504def V1Rd_VSA : SchedReadAdvance<3, [V1Wr_VSA]>; 505 506def V1Wr_FCMA : SchedWriteRes<[V1UnitV]> { let Latency = 4; } 507def V1Rd_FCMA : SchedReadAdvance<2, [V1Wr_FCMA]>; 508 509def V1Wr_FPM : SchedWriteRes<[V1UnitV]> { let Latency = 3; } 510def V1Wr_FPMA : SchedWriteRes<[V1UnitV]> { let Latency = 4; } 511def V1Rd_FPMA : SchedReadAdvance<2, [V1Wr_FPM, V1Wr_FPMA]>; 512 513def V1Wr_FPMAL : SchedWriteRes<[V1UnitV]> { let Latency = 5; } 514def V1Rd_FPMAL : SchedReadAdvance<3, [V1Wr_FPMAL]>; 515 516def V1Wr_BFD : SchedWriteRes<[V1UnitV]> { let Latency = 4; } 517def V1Rd_BFD : SchedReadAdvance<2, [V1Wr_BFD]>; 518 519def V1Wr_BFMMA : SchedWriteRes<[V1UnitV]> { let Latency = 5; } 520def V1Rd_BFMMA : SchedReadAdvance<2, [V1Wr_BFMMA]>; 521 522def V1Wr_BFMLA : SchedWriteRes<[V1UnitV]> { let Latency = 4; } 523def V1Rd_BFMLA : SchedReadAdvance<2, [V1Wr_BFMLA]>; 524 525def V1Wr_CRC : SchedWriteRes<[V1UnitM0]> { let Latency = 2; } 526def V1Rd_CRC : SchedReadAdvance<1, [V1Wr_CRC]>; 527 528def V1Wr_ZDOTB : SchedWriteRes<[V1UnitV01]> { let Latency = 3; } 529def V1Rd_ZDOTB : SchedReadAdvance<2, [V1Wr_ZDOTB]>; 530 531def V1Wr_ZUDOTB : SchedWriteRes<[V1UnitV]> { let Latency = 3; } 532def V1Rd_ZUDOTB : SchedReadAdvance<2, [V1Wr_ZUDOTB]>; 533 534def V1Wr_ZDOTH : SchedWriteRes<[V1UnitV0]> { let Latency = 4; } 535def V1Rd_ZDOTH : SchedReadAdvance<3, [V1Wr_ZDOTH]>; 536 537def V1Wr_ZMMA : SchedWriteRes<[V1UnitV01]> { let Latency = 3; } 538def V1Rd_ZMMA : SchedReadAdvance<2, [V1Wr_ZMMA]>; 539 540let Latency = 5, NumMicroOps = 2 in 541def V1Wr_ZMAD : SchedWriteRes<[V1UnitV0, V1UnitV0]>; 542def V1Rd_ZMAD : SchedReadAdvance<3, [V1Wr_ZMAD]>; 543 544def V1Wr_ZFCMA : SchedWriteRes<[V1UnitV01]> { let Latency = 5; } 545def V1Rd_ZFCMA : SchedReadAdvance<3, [V1Wr_ZFCMA]>; 546 547def V1Wr_ZFMA : SchedWriteRes<[V1UnitV01]> { let Latency = 4; } 548def V1Rd_ZFMA : SchedReadAdvance<2, [V1Wr_ZFMA]>; 549 550def V1Wr_ZBFDOT : SchedWriteRes<[V1UnitV01]> { let Latency = 4; } 551def V1Rd_ZBFDOT : SchedReadAdvance<2, [V1Wr_ZBFDOT]>; 552def V1Wr_ZBFMMA : SchedWriteRes<[V1UnitV01]> { let Latency = 5; } 553def V1Rd_ZBFMMA : SchedReadAdvance<2, [V1Wr_ZBFMMA]>; 554def V1Wr_ZBFMAL : SchedWriteRes<[V1UnitV01]> { let Latency = 5; } 555def V1Rd_ZBFMAL : SchedReadAdvance<3, [V1Wr_ZBFMAL]>; 556 557// Miscellaneous Instructions 558// ----------------------------------------------------------------------------- 559 560// COPY 561def : InstRW<[V1Write_1c_1I], (instrs COPY)>; 562 563// MSR 564def : WriteRes<WriteSys, []> { let Latency = 1; } 565 566 567// Branch Instructions 568// ----------------------------------------------------------------------------- 569 570// Branch, immed 571// Compare and branch 572def : SchedAlias<WriteBr, V1Write_1c_1B>; 573 574// Branch, register 575def : SchedAlias<WriteBrReg, V1Write_1c_1B>; 576 577// Branch and link, immed 578// Branch and link, register 579def : InstRW<[V1Write_1c_1B_1S], (instrs BL, BLR)>; 580 581// Compare and branch 582def : InstRW<[V1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>; 583 584 585// Arithmetic and Logical Instructions 586// ----------------------------------------------------------------------------- 587 588// ALU, basic 589// Conditional compare 590// Conditional select 591// Logical, basic 592// Address generation 593// Count leading 594// Reverse bits/bytes 595// Move immediate 596def : SchedAlias<WriteI, V1Write_1c_1I>; 597 598// ALU, basic, flagset 599def : InstRW<[V1Write_1c_1I_1Flg], 600 (instregex "^(ADD|SUB)S[WX]r[ir]$", 601 "^(ADC|SBC)S[WX]r$", 602 "^ANDS[WX]ri$", 603 "^(AND|BIC)S[WX]rr$")>; 604 605// ALU, extend and shift 606def : SchedAlias<WriteIEReg, V1Write_2c_1M>; 607 608// Arithmetic, LSL shift, shift <= 4 609// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 610def V1WriteISReg : SchedWriteVariant< 611 [SchedVar<IsCheapLSL, [V1Write_1c_1I]>, 612 SchedVar<NoSchedPred, [V1Write_2c_1M]>]>; 613def : SchedAlias<WriteISReg, V1WriteISReg>; 614 615// Arithmetic, flagset, LSL shift, shift <= 4 616// Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 617def V1WriteISRegS : SchedWriteVariant< 618 [SchedVar<IsCheapLSL, [V1Write_1c_1I_1Flg]>, 619 SchedVar<NoSchedPred, [V1Write_2c_1M_1Flg]>]>; 620def : InstRW<[V1WriteISRegS], 621 (instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>; 622 623// Logical, shift, no flagset 624def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>; 625 626// Logical, shift, flagset 627def : InstRW<[V1Write_2c_1M_1Flg], (instregex "^(AND|BIC)S[WX]rs$")>; 628 629// Flag manipulation instructions 630def : InstRW<[V1Write_1c_1I_1Flg], (instrs SETF8, SETF16, RMIF, CFINV)>; 631 632 633// Divide and multiply instructions 634// ----------------------------------------------------------------------------- 635 636// Divide 637def : SchedAlias<WriteID32, V1Write_12c5_1M0>; 638def : SchedAlias<WriteID64, V1Write_20c5_1M0>; 639 640def : SchedAlias<WriteIM32, V1Write_2c_1M>; 641def : SchedAlias<WriteIM64, V1Write_2c_1M>; 642 643// Multiply 644// Multiply accumulate, W-form 645// Multiply accumulate, X-form 646def : InstRW<[V1WriteIM, ReadIM, ReadIM, V1Rd_IMA], 647 (instregex "^M(ADD|SUB)[WX]rrr$")>; 648 649// Multiply accumulate long 650// Multiply long 651def : InstRW<[V1WriteIM, ReadIM, ReadIM, V1Rd_IMA], 652 (instregex "^(S|U)M(ADD|SUB)Lrrr$")>; 653// Multiply high 654def : InstRW<[V1Write_3c_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>; 655 656 657// Pointer Authentication Instructions (v8.3 PAC) 658// ----------------------------------------------------------------------------- 659 660// Authenticate data address 661// Authenticate instruction address 662// Compute pointer authentication code for data address 663// Compute pointer authentication code, using generic key 664// Compute pointer authentication code for instruction address 665def : InstRW<[V1Write_5c_1M0], (instregex "^AUT", 666 "^PAC")>; 667 668// Branch and link, register, with pointer authentication 669// Branch, register, with pointer authentication 670// Branch, return, with pointer authentication 671def : InstRW<[V1Write_6c_1B_1M0], (instregex "^BL?RA[AB]Z?$", 672 "^E?RETA[AB]$")>; 673 674// Load register, with pointer authentication 675def : InstRW<[V1Write_9c_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>; 676 677// Strip pointer authentication code 678def : InstRW<[V1Write_2c_1M0], (instrs XPACD, XPACI, XPACLRI)>; 679 680 681// Miscellaneous data-processing instructions 682// ----------------------------------------------------------------------------- 683 684// Bitfield extract, one reg 685// Bitfield extract, two regs 686def V1WriteExtr : SchedWriteVariant< 687 [SchedVar<IsRORImmIdiomPred, [V1Write_1c_1I]>, 688 SchedVar<NoSchedPred, [V1Write_3c_1I_1M]>]>; 689def : SchedAlias<WriteExtr, V1WriteExtr>; 690 691// Bitfield move, basic 692// Variable shift 693def : SchedAlias<WriteIS, V1Write_1c_1I>; 694 695// Bitfield move, insert 696def : InstRW<[V1Write_2c_1M], (instregex "^BFM[WX]ri$")>; 697 698// Move immediate 699def : SchedAlias<WriteImm, V1Write_1c_1I>; 700 701 702// Load instructions 703// ----------------------------------------------------------------------------- 704 705// Load register, immed offset 706def : SchedAlias<WriteLD, V1Write_4c_1L>; 707 708// Load register, immed offset, index 709def : SchedAlias<WriteLDIdx, V1Write_4c_1L>; 710def : SchedAlias<WriteAdr, V1Write_1c_1I>; 711 712// Load pair, immed offset 713def : SchedAlias<WriteLDHi, V1Write_4c_1L>; 714def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z], (instrs LDPWi, LDNPWi)>; 715def : InstRW<[WriteAdr, V1Write_4c_1L, V1Write_0c_0Z], 716 (instrs LDPWpost, LDPWpre)>; 717 718// Load pair, signed immed offset, signed words 719def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z], (instrs LDPSWi)>; 720 721// Load pair, immed post or pre-index, signed words 722def : InstRW<[WriteAdr, V1Write_5c_1I_1L, V1Write_0c_0Z], 723 (instrs LDPSWpost, LDPSWpre)>; 724 725 726// Store instructions 727// ----------------------------------------------------------------------------- 728 729// Store register, immed offset 730def : SchedAlias<WriteST, V1Write_1c_1L01_1D>; 731 732// Store register, immed offset, index 733def : SchedAlias<WriteSTIdx, V1Write_1c_1L01_1D>; 734 735// Store pair, immed offset 736def : SchedAlias<WriteSTP, V1Write_1c_1L01_1D>; 737 738 739// FP data processing instructions 740// ----------------------------------------------------------------------------- 741 742// FP absolute value 743// FP arithmetic 744// FP min/max 745// FP negate 746def : SchedAlias<WriteF, V1Write_2c_1V>; 747 748// FP compare 749def : SchedAlias<WriteFCmp, V1Write_2c_1V0>; 750 751// FP divide 752// FP square root 753def : SchedAlias<WriteFDiv, V1Write_10c7_1V02>; 754 755// FP divide, H-form 756// FP square root, H-form 757def : InstRW<[V1Write_7c7_1V02], (instrs FDIVHrr, FSQRTHr)>; 758 759// FP divide, S-form 760// FP square root, S-form 761def : InstRW<[V1Write_10c7_1V02], (instrs FDIVSrr, FSQRTSr)>; 762 763// FP divide, D-form 764def : InstRW<[V1Write_15c7_1V02], (instrs FDIVDrr)>; 765 766// FP square root, D-form 767def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTDr)>; 768 769// FP multiply 770def : WriteRes<WriteFMul, [V1UnitV]> { let Latency = 3; } 771 772// FP multiply accumulate 773def : InstRW<[V1Wr_FMA, ReadDefault, ReadDefault, V1Rd_FMA], 774 (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; 775 776// FP round to integral 777def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$", 778 "^FRINT(32|64)[XZ][SD]r$")>; 779 780// FP select 781def : InstRW<[V1Write_2c_1V01], (instregex "^FCSEL[HSD]rrr$")>; 782 783 784// FP miscellaneous instructions 785// ----------------------------------------------------------------------------- 786 787// FP convert, from gen to vec reg 788def : InstRW<[V1Write_3c_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>; 789 790// FP convert, from vec to gen reg 791def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>; 792 793// FP convert, Javascript from vec to gen reg 794def : InstRW<[V1Write_3c_1V0], (instrs FJCVTZS)>; 795 796// FP convert, from vec to vec reg 797def : SchedAlias<WriteFCvt, V1Write_3c_1V02>; 798 799// FP move, immed 800def : SchedAlias<WriteFImm, V1Write_2c_1V>; 801 802// FP move, register 803def : InstRW<[V1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>; 804 805// FP transfer, from gen to low half of vec reg 806def : InstRW<[V1Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>; 807 808// FP transfer, from gen to high half of vec reg 809def : InstRW<[V1Write_5c_1M0_1V], (instrs FMOVXDHighr)>; 810 811// FP transfer, from vec to gen reg 812def : SchedAlias<WriteFCopy, V1Write_2c_1V1>; 813 814 815// FP load instructions 816// ----------------------------------------------------------------------------- 817 818// Load vector reg, literal, S/D/Q forms 819// Load vector reg, unscaled immed 820// Load vector reg, unsigned immed 821def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$", 822 "^LDUR[BHSDQ]i$", 823 "^LDR[BHSDQ]ui$")>; 824 825// Load vector reg, immed post-index 826// Load vector reg, immed pre-index 827def : InstRW<[WriteAdr, V1Write_6c_1L], 828 (instregex "^LDR[BHSDQ](post|pre)$")>; 829 830// Load vector reg, register offset, basic 831// Load vector reg, register offset, scale, S/D-form 832// Load vector reg, register offset, extend 833// Load vector reg, register offset, extend, scale, S/D-form 834def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>; 835 836// Load vector reg, register offset, scale, H/Q-form 837// Load vector reg, register offset, extend, scale, H/Q-form 838def : InstRW<[V1Write_7c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>; 839 840// Load vector pair, immed offset, S/D-form 841def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z], (instregex "^LDN?P[SD]i$")>; 842 843// Load vector pair, immed offset, Q-form 844def : InstRW<[V1Write_6c_1L, WriteLDHi], (instrs LDPQi, LDNPQi)>; 845 846// Load vector pair, immed post-index, S/D-form 847// Load vector pair, immed pre-index, S/D-form 848def : InstRW<[WriteAdr, V1Write_6c_1L, V1Write_0c_0Z], 849 (instregex "^LDP[SD](pre|post)$")>; 850 851// Load vector pair, immed post-index, Q-form 852// Load vector pair, immed pre-index, Q-form 853def : InstRW<[WriteAdr, V1Write_6c_1L, WriteLDHi], 854 (instrs LDPQpost, LDPQpre)>; 855 856 857// FP store instructions 858// ----------------------------------------------------------------------------- 859 860// Store vector reg, unscaled immed, B/H/S/D/Q-form 861def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>; 862 863// Store vector reg, immed post-index, B/H/S/D/Q-form 864// Store vector reg, immed pre-index, B/H/S/D/Q-form 865def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01], 866 (instregex "^STR[BHSDQ](pre|post)$")>; 867 868// Store vector reg, unsigned immed, B/H/S/D/Q-form 869def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>; 870 871// Store vector reg, register offset, basic, B/S/D-form 872// Store vector reg, register offset, scale, B/S/D-form 873// Store vector reg, register offset, extend, B/S/D-form 874// Store vector reg, register offset, extend, scale, B/S/D-form 875def : InstRW<[V1Write_2c_1L01_1V01, ReadAdrBase], 876 (instregex "^STR[BSD]ro[WX]$")>; 877 878// Store vector reg, register offset, basic, H/Q-form 879// Store vector reg, register offset, scale, H/Q-form 880// Store vector reg, register offset, extend, H/Q-form 881// Store vector reg, register offset, extend, scale, H/Q-form 882def : InstRW<[V1Write_2c_1I_1L01_1V01, ReadAdrBase], 883 (instregex "^STR[HQ]ro[WX]$")>; 884 885// Store vector pair, immed offset, S/D/Q-form 886def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STN?P[SDQ]i$")>; 887 888// Store vector pair, immed post-index, S/D-form 889// Store vector pair, immed pre-index, S/D-form 890def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01], 891 (instregex "^STP[SD](pre|post)$")>; 892 893// Store vector pair, immed post-index, Q-form 894// Store vector pair, immed pre-index, Q-form 895def : InstRW<[WriteAdr, V1Write_2c_2L01_1V01], (instrs STPQpre, STPQpost)>; 896 897 898// ASIMD integer instructions 899// ----------------------------------------------------------------------------- 900 901// ASIMD absolute diff 902// ASIMD absolute diff long 903// ASIMD arith, basic 904// ASIMD arith, complex 905// ASIMD arith, pair-wise 906// ASIMD compare 907// ASIMD logical 908// ASIMD max/min, basic and pair-wise 909def : SchedAlias<WriteVd, V1Write_2c_1V>; 910def : SchedAlias<WriteVq, V1Write_2c_1V>; 911 912// ASIMD absolute diff accum 913// ASIMD absolute diff accum long 914// ASIMD pairwise add and accumulate long 915def : InstRW<[V1Wr_ADA, V1Rd_ADA], (instregex "^[SU]ABAL?v", "^[SU]ADALPv")>; 916 917// ASIMD arith, reduce, 4H/4S 918// ASIMD max/min, reduce, 4H/4S 919def : InstRW<[V1Write_2c_1V13], (instregex "^(ADD|[SU]ADDL)Vv4(i16|i32)v$", 920 "^[SU](MAX|MIN)Vv4(i16|i32)v$")>; 921 922// ASIMD arith, reduce, 8B/8H 923// ASIMD max/min, reduce, 8B/8H 924def : InstRW<[V1Write_4c_1V13_1V], (instregex "^(ADD|[SU]ADDL)Vv8(i8|i16)v$", 925 "^[SU](MAX|MIN)Vv8(i8|i16)v$")>; 926 927// ASIMD arith, reduce, 16B 928// ASIMD max/min, reduce, 16B 929def : InstRW<[V1Write_4c_2V13], (instregex "^(ADD|[SU]ADDL)Vv16i8v$", 930 "[SU](MAX|MIN)Vv16i8v$")>; 931 932// ASIMD dot product 933// ASIMD dot product using signed and unsigned integers 934def : InstRW<[V1Wr_VDOT, V1Rd_VDOT], 935 (instregex "^([SU]|SU|US)DOT(lane)?v(8|16)i8$")>; 936 937// ASIMD matrix multiply-accumulate 938def : InstRW<[V1Wr_VMMA, V1Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>; 939 940// ASIMD multiply 941def : InstRW<[V1Write_4c_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>; 942 943// ASIMD multiply accumulate 944def : InstRW<[V1Wr_VMA, V1Rd_VMA], (instregex "^MLAv", "^MLSv")>; 945 946// ASIMD multiply accumulate long 947def : InstRW<[V1Wr_VMAL, V1Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>; 948 949// ASIMD multiply accumulate high 950def : InstRW<[V1Write_4c_1V02], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>; 951 952// ASIMD multiply accumulate saturating long 953def : InstRW<[V1Write_4c_1V02], (instregex "^SQDML[AS]L[iv]")>; 954 955// ASIMD multiply/multiply long (8x8) polynomial 956def : InstRW<[V1Write_3c_1V01], (instregex "^PMULL?v(8|16)i8$")>; 957 958// ASIMD multiply long 959def : InstRW<[V1Write_3c_1V02], (instregex "^([SU]|SQD)MULLv")>; 960 961// ASIMD shift accumulate 962def : InstRW<[V1Wr_VSA, V1Rd_VSA], (instregex "^[SU]SRAv", "^[SU]RSRAv")>; 963 964// ASIMD shift by immed, complex 965// ASIMD shift by register, complex 966def : InstRW<[V1Write_4c_1V13], 967 (instregex "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$", 968 "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$", 969 "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv", 970 "^[SU]Q?RSHLv", "^[SU]QSHLv")>; 971 972// ASIMD shift by immed, basic 973// ASIMD shift by immed and insert, basic 974// ASIMD shift by register, basic 975def : InstRW<[V1Write_2c_1V13], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv", 976 "^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>; 977 978 979// ASIMD FP instructions 980// ----------------------------------------------------------------------------- 981 982// ASIMD FP absolute value/difference 983// ASIMD FP arith, normal 984// ASIMD FP compare 985// ASIMD FP max/min, normal 986// ASIMD FP max/min, pairwise 987// ASIMD FP negate 988// Covered by "SchedAlias (WriteV[dq]...)" above 989 990// ASIMD FP complex add 991def : InstRW<[V1Write_4c_1V], (instregex "^FCADD(v[48]f16|v[24]f32|v2f64)$")>; 992 993// ASIMD FP complex multiply add 994def : InstRW<[V1Wr_FCMA, V1Rd_FCMA], (instregex "^FCMLAv")>; 995 996// ASIMD FP multiply 997def : InstRW<[V1Wr_FPM], (instregex "^FMULX?v")>; 998 999// ASIMD FP multiply accumulate 1000def : InstRW<[V1Wr_FPMA, V1Rd_FPMA], (instregex "^FML[AS]v")>; 1001 1002// ASIMD FP multiply accumulate long 1003def : InstRW<[V1Wr_FPMAL, V1Rd_FPMAL], (instregex "^FML[AS]L2?v")>; 1004 1005// ASIMD FP convert, long (F16 to F32) 1006def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTLv[48]i16$")>; 1007 1008// ASIMD FP convert, long (F32 to F64) 1009def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTLv[24]i32$")>; 1010 1011// ASIMD FP convert, narrow (F32 to F16) 1012def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTNv[48]i16$")>; 1013 1014// ASIMD FP convert, narrow (F64 to F32) 1015def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTNv[24]i32$", 1016 "^FCVTXN(v[24]f32|v1i64)$")>; 1017 1018// ASIMD FP convert, other, D-form F32 and Q-form F64 1019def : InstRW<[V1Write_3c_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$", 1020 "^FCVT[AMNPZ][SU]v2i(32|64)_shift$", 1021 "^FCVT[AMNPZ][SU]v1i64$", 1022 "^FCVTZ[SU]d$", 1023 "^[SU]CVTFv2f(32|64)$", 1024 "^[SU]CVTFv2i(32|64)_shift$", 1025 "^[SU]CVTFv1i64$", 1026 "^[SU]CVTFd$")>; 1027 1028// ASIMD FP convert, other, D-form F16 and Q-form F32 1029def : InstRW<[V1Write_4c_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$", 1030 "^FCVT[AMNPZ][SU]v4i(16|32)_shift$", 1031 "^FCVT[AMNPZ][SU]v1i32$", 1032 "^FCVTZ[SU]s$", 1033 "^[SU]CVTFv4f(16|32)$", 1034 "^[SU]CVTFv4i(16|32)_shift$", 1035 "^[SU]CVTFv1i32$", 1036 "^[SU]CVTFs$")>; 1037 1038// ASIMD FP convert, other, Q-form F16 1039def : InstRW<[V1Write_6c_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$", 1040 "^FCVT[AMNPZ][SU]v8i16_shift$", 1041 "^FCVT[AMNPZ][SU]v1f16$", 1042 "^FCVTZ[SU]h$", 1043 "^[SU]CVTFv8f16$", 1044 "^[SU]CVTFv8i16_shift$", 1045 "^[SU]CVTFv1i16$", 1046 "^[SU]CVTFh$")>; 1047 1048// ASIMD FP divide, D-form, F16 1049// ASIMD FP square root, D-form, F16 1050def : InstRW<[V1Write_7c7_1V02], (instrs FDIVv4f16, FSQRTv4f16)>; 1051 1052// ASIMD FP divide, F32 1053// ASIMD FP square root, F32 1054def : InstRW<[V1Write_10c7_1V02], (instrs FDIVv2f32, FDIVv4f32, 1055 FSQRTv2f32, FSQRTv4f32)>; 1056 1057// ASIMD FP divide, Q-form, F16 1058def : InstRW<[V1Write_13c5_1V02], (instrs FDIVv8f16)>; 1059 1060// ASIMD FP divide, Q-form, F64 1061def : InstRW<[V1Write_15c7_1V02], (instrs FDIVv2f64)>; 1062 1063// ASIMD FP square root, Q-form, F16 1064def : InstRW<[V1Write_13c11_1V02], (instrs FSQRTv8f16)>; 1065 1066// ASIMD FP square root, Q-form, F64 1067def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTv2f64)>; 1068 1069// ASIMD FP max/min, reduce, F32 and D-form F16 1070def : InstRW<[V1Write_4c_2V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>; 1071 1072// ASIMD FP max/min, reduce, Q-form F16 1073def : InstRW<[V1Write_6c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>; 1074 1075// ASIMD FP round, D-form F32 and Q-form F64 1076def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>; 1077 1078// ASIMD FP round, D-form F16 and Q-form F32 1079def : InstRW<[V1Write_4c_2V02], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>; 1080 1081// ASIMD FP round, Q-form F16 1082def : InstRW<[V1Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>; 1083 1084 1085// ASIMD BF instructions 1086// ----------------------------------------------------------------------------- 1087 1088// ASIMD convert, F32 to BF16 1089def : InstRW<[V1Write_4c_1V02], (instrs BFCVTN, BFCVTN2)>; 1090 1091// ASIMD dot product 1092def : InstRW<[V1Wr_BFD, V1Rd_BFD], (instregex "^BF(DOT|16DOTlane)v[48]bf16$")>; 1093 1094// ASIMD matrix multiply accumulate 1095def : InstRW<[V1Wr_BFMMA, V1Rd_BFMMA], (instrs BFMMLA)>; 1096 1097// ASIMD multiply accumulate long 1098def : InstRW<[V1Wr_BFMLA, V1Rd_BFMLA], (instregex "^BFMLAL[BT](Idx)?$")>; 1099 1100// Scalar convert, F32 to BF16 1101def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>; 1102 1103 1104// ASIMD miscellaneous instructions 1105// ----------------------------------------------------------------------------- 1106 1107// ASIMD bit reverse 1108// ASIMD bitwise insert 1109// ASIMD count 1110// ASIMD duplicate, element 1111// ASIMD extract 1112// ASIMD extract narrow 1113// ASIMD insert, element to element 1114// ASIMD move, FP immed 1115// ASIMD move, integer immed 1116// ASIMD reverse 1117// ASIMD table lookup, 1 or 2 table regs 1118// ASIMD table lookup extension, 1 table reg 1119// ASIMD transfer, element to gen reg 1120// ASIMD transpose 1121// ASIMD unzip/zip 1122// Covered by "SchedAlias (WriteV[dq]...)" above 1123 1124// ASIMD duplicate, gen reg 1125def : InstRW<[V1Write_3c_1M0], 1126 (instregex "^DUP((v16|v8)i8|(v8|v4)i16|(v4|v2)i32|v2i64)gpr$")>; 1127 1128// ASIMD extract narrow, saturating 1129def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>; 1130 1131// ASIMD reciprocal and square root estimate, D-form U32 1132// ASIMD reciprocal and square root estimate, D-form F32 and F64 1133def : InstRW<[V1Write_3c_1V02], (instrs URECPEv2i32, 1134 URSQRTEv2i32, 1135 FRECPEv1i32, FRECPEv2f32, FRECPEv1i64, 1136 FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64)>; 1137 1138// ASIMD reciprocal and square root estimate, Q-form U32 1139// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 and F64 1140def : InstRW<[V1Write_4c_1V02], (instrs URECPEv4i32, 1141 URSQRTEv4i32, 1142 FRECPEv1f16, FRECPEv4f16, 1143 FRECPEv4f32, FRECPEv2f64, 1144 FRSQRTEv1f16, FRSQRTEv4f16, 1145 FRSQRTEv4f32, FRSQRTEv2f64)>; 1146 1147// ASIMD reciprocal and square root estimate, Q-form F16 1148def : InstRW<[V1Write_6c_2V02], (instrs FRECPEv8f16, 1149 FRSQRTEv8f16)>; 1150 1151// ASIMD reciprocal exponent 1152def : InstRW<[V1Write_3c_1V02], (instrs FRECPXv1f16, FRECPXv1i32, FRECPXv1i64)>; 1153 1154// ASIMD reciprocal step 1155def : InstRW<[V1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv", 1156 "^FRSQRTS(16|32|64)$", "^FRSQRTSv")>; 1157 1158// ASIMD table lookup, 1 or 2 table regs 1159// ASIMD table lookup extension, 1 table reg 1160def : InstRW<[V1Write_2c_2V01], (instregex "^TBLv(8|16)i8(One|Two)$", 1161 "^TBXv(8|16)i8One$")>; 1162 1163// ASIMD table lookup, 3 table regs 1164// ASIMD table lookup extension, 2 table reg 1165def : InstRW<[V1Write_4c_2V01], (instrs TBLv8i8Three, TBLv16i8Three, 1166 TBXv8i8Two, TBXv16i8Two)>; 1167 1168// ASIMD table lookup, 4 table regs 1169def : InstRW<[V1Write_4c_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>; 1170 1171// ASIMD table lookup extension, 3 table reg 1172def : InstRW<[V1Write_6c_3V01], (instrs TBXv8i8Three, TBXv16i8Three)>; 1173 1174// ASIMD table lookup extension, 4 table reg 1175def : InstRW<[V1Write_6c_5V01], (instrs TBXv8i8Four, TBXv16i8Four)>; 1176 1177// ASIMD transfer, element to gen reg 1178def : InstRW<[V1Write_2c_1V], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$", 1179 "^UMOVvi(8|16|32|64)$")>; 1180 1181// ASIMD transfer, gen reg to element 1182def : InstRW<[V1Write_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>; 1183 1184 1185// ASIMD load instructions 1186// ----------------------------------------------------------------------------- 1187 1188// ASIMD load, 1 element, multiple, 1 reg 1189def : InstRW<[V1Write_6c_1L], 1190 (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1191def : InstRW<[WriteAdr, V1Write_6c_1L], 1192 (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1193 1194// ASIMD load, 1 element, multiple, 2 reg 1195def : InstRW<[V1Write_6c_2L], 1196 (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1197def : InstRW<[WriteAdr, V1Write_6c_2L], 1198 (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1199 1200// ASIMD load, 1 element, multiple, 3 reg 1201def : InstRW<[V1Write_6c_3L], 1202 (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1203def : InstRW<[WriteAdr, V1Write_6c_3L], 1204 (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1205 1206// ASIMD load, 1 element, multiple, 4 reg, D-form 1207def : InstRW<[V1Write_6c_2L], 1208 (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; 1209def : InstRW<[WriteAdr, V1Write_6c_2L], 1210 (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; 1211 1212// ASIMD load, 1 element, multiple, 4 reg, Q-form 1213def : InstRW<[V1Write_7c_4L], 1214 (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; 1215def : InstRW<[WriteAdr, V1Write_7c_4L], 1216 (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; 1217 1218// ASIMD load, 1 element, one lane 1219// ASIMD load, 1 element, all lanes 1220def : InstRW<[V1Write_8c_1L_1V], 1221 (instregex "^LD1(i|Rv)(8|16|32|64)$", 1222 "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1223def : InstRW<[WriteAdr, V1Write_8c_1L_1V], 1224 (instregex "^LD1i(8|16|32|64)_POST$", 1225 "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1226 1227// ASIMD load, 2 element, multiple, D-form 1228def : InstRW<[V1Write_8c_1L_2V], 1229 (instregex "^LD2Twov(8b|4h|2s)$")>; 1230def : InstRW<[WriteAdr, V1Write_8c_1L_2V], 1231 (instregex "^LD2Twov(8b|4h|2s)_POST$")>; 1232 1233// ASIMD load, 2 element, multiple, Q-form 1234def : InstRW<[V1Write_8c_2L_2V], 1235 (instregex "^LD2Twov(16b|8h|4s|2d)$")>; 1236def : InstRW<[WriteAdr, V1Write_8c_2L_2V], 1237 (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; 1238 1239// ASIMD load, 2 element, one lane 1240// ASIMD load, 2 element, all lanes 1241def : InstRW<[V1Write_8c_1L_2V], 1242 (instregex "^LD2i(8|16|32|64)$", 1243 "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1244def : InstRW<[WriteAdr, V1Write_8c_1L_2V], 1245 (instregex "^LD2i(8|16|32|64)_POST$", 1246 "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1247 1248// ASIMD load, 3 element, multiple, D-form 1249// ASIMD load, 3 element, one lane 1250// ASIMD load, 3 element, all lanes 1251def : InstRW<[V1Write_8c_2L_3V], 1252 (instregex "^LD3Threev(8b|4h|2s)$", 1253 "^LD3i(8|16|32|64)$", 1254 "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1255def : InstRW<[WriteAdr, V1Write_8c_2L_3V], 1256 (instregex "^LD3Threev(8b|4h|2s)_POST$", 1257 "^LD3i(8|16|32|64)_POST$", 1258 "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1259 1260// ASIMD load, 3 element, multiple, Q-form 1261def : InstRW<[V1Write_8c_3L_3V], 1262 (instregex "^LD3Threev(16b|8h|4s|2d)$")>; 1263def : InstRW<[WriteAdr, V1Write_8c_3L_3V], 1264 (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>; 1265 1266// ASIMD load, 4 element, multiple, D-form 1267// ASIMD load, 4 element, one lane 1268// ASIMD load, 4 element, all lanes 1269def : InstRW<[V1Write_8c_3L_4V], 1270 (instregex "^LD4Fourv(8b|4h|2s)$", 1271 "^LD4i(8|16|32|64)$", 1272 "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1273def : InstRW<[WriteAdr, V1Write_8c_3L_4V], 1274 (instregex "^LD4Fourv(8b|4h|2s)_POST$", 1275 "^LD4i(8|16|32|64)_POST$", 1276 "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1277 1278// ASIMD load, 4 element, multiple, Q-form 1279def : InstRW<[V1Write_9c_4L_4V], 1280 (instregex "^LD4Fourv(16b|8h|4s|2d)$")>; 1281def : InstRW<[WriteAdr, V1Write_9c_4L_4V], 1282 (instregex "^LD4Fourv(16b|8h|4s|2d)_POST$")>; 1283 1284 1285// ASIMD store instructions 1286// ----------------------------------------------------------------------------- 1287 1288// ASIMD store, 1 element, multiple, 1 reg 1289// ASIMD store, 1 element, multiple, 2 reg, D-form 1290def : InstRW<[V1Write_2c_1L01_1V01], 1291 (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$", 1292 "^ST1Twov(8b|4h|2s|1d)$")>; 1293def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01], 1294 (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$", 1295 "^ST1Twov(8b|4h|2s|1d)_POST$")>; 1296 1297// ASIMD store, 1 element, multiple, 2 reg, Q-form 1298// ASIMD store, 1 element, multiple, 3 reg, D-form 1299// ASIMD store, 1 element, multiple, 4 reg, D-form 1300def : InstRW<[V1Write_2c_2L01_2V01], 1301 (instregex "^ST1Twov(16b|8h|4s|2d)$", 1302 "^ST1Threev(8b|4h|2s|1d)$", 1303 "^ST1Fourv(8b|4h|2s|1d)$")>; 1304def : InstRW<[WriteAdr, V1Write_2c_2L01_2V01], 1305 (instregex "^ST1Twov(16b|8h|4s|2d)_POST$", 1306 "^ST1Threev(8b|4h|2s|1d)_POST$", 1307 "^ST1Fourv(8b|4h|2s|1d)_POST$")>; 1308 1309// ASIMD store, 1 element, multiple, 3 reg, Q-form 1310def : InstRW<[V1Write_2c_3L01_3V01], 1311 (instregex "^ST1Threev(16b|8h|4s|2d)$")>; 1312def : InstRW<[WriteAdr, V1Write_2c_3L01_3V01], 1313 (instregex "^ST1Threev(16b|8h|4s|2d)_POST$")>; 1314 1315// ASIMD store, 1 element, multiple, 4 reg, Q-form 1316def : InstRW<[V1Write_2c_4L01_4V01], 1317 (instregex "^ST1Fourv(16b|8h|4s|2d)$")>; 1318def : InstRW<[WriteAdr, V1Write_2c_4L01_4V01], 1319 (instregex "^ST1Fourv(16b|8h|4s|2d)_POST$")>; 1320 1321// ASIMD store, 1 element, one lane 1322// ASIMD store, 2 element, multiple, D-form 1323// ASIMD store, 2 element, one lane 1324def : InstRW<[V1Write_4c_1L01_1V01], 1325 (instregex "^ST1i(8|16|32|64)$", 1326 "^ST2Twov(8b|4h|2s)$", 1327 "^ST2i(8|16|32|64)$")>; 1328def : InstRW<[WriteAdr, V1Write_4c_1L01_1V01], 1329 (instregex "^ST1i(8|16|32|64)_POST$", 1330 "^ST2Twov(8b|4h|2s)_POST$", 1331 "^ST2i(8|16|32|64)_POST$")>; 1332 1333// ASIMD store, 2 element, multiple, Q-form 1334// ASIMD store, 3 element, multiple, D-form 1335// ASIMD store, 3 element, one lane 1336// ASIMD store, 4 element, one lane, D 1337def : InstRW<[V1Write_4c_2L01_2V01], 1338 (instregex "^ST2Twov(16b|8h|4s|2d)$", 1339 "^ST3Threev(8b|4h|2s)$", 1340 "^ST3i(8|16|32|64)$", 1341 "^ST4i64$")>; 1342def : InstRW<[WriteAdr, V1Write_4c_2L01_2V01], 1343 (instregex "^ST2Twov(16b|8h|4s|2d)_POST$", 1344 "^ST3Threev(8b|4h|2s)_POST$", 1345 "^ST3i(8|16|32|64)_POST$", 1346 "^ST4i64_POST$")>; 1347 1348// ASIMD store, 3 element, multiple, Q-form 1349def : InstRW<[V1Write_5c_3L01_3V01], 1350 (instregex "^ST3Threev(16b|8h|4s|2d)$")>; 1351def : InstRW<[WriteAdr, V1Write_5c_3L01_3V01], 1352 (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>; 1353 1354// ASIMD store, 4 element, multiple, D-form 1355def : InstRW<[V1Write_6c_3L01_3V01], 1356 (instregex "^ST4Fourv(8b|4h|2s)$")>; 1357def : InstRW<[WriteAdr, V1Write_6c_3L01_3V01], 1358 (instregex "^ST4Fourv(8b|4h|2s)_POST$")>; 1359 1360// ASIMD store, 4 element, multiple, Q-form, B/H/S 1361def : InstRW<[V1Write_7c_6L01_6V01], 1362 (instregex "^ST4Fourv(16b|8h|4s)$")>; 1363def : InstRW<[WriteAdr, V1Write_7c_6L01_6V01], 1364 (instregex "^ST4Fourv(16b|8h|4s)_POST$")>; 1365 1366// ASIMD store, 4 element, multiple, Q-form, D 1367def : InstRW<[V1Write_4c_4L01_4V01], 1368 (instrs ST4Fourv2d)>; 1369def : InstRW<[WriteAdr, V1Write_4c_4L01_4V01], 1370 (instrs ST4Fourv2d_POST)>; 1371 1372// ASIMD store, 4 element, one lane, B/H/S 1373def : InstRW<[V1Write_6c_3L_3V], 1374 (instregex "^ST4i(8|16|32)$")>; 1375def : InstRW<[WriteAdr, V1Write_6c_3L_3V], 1376 (instregex "^ST4i(8|16|32)_POST$")>; 1377 1378 1379// Cryptography extensions 1380// ----------------------------------------------------------------------------- 1381 1382// Crypto polynomial (64x64) multiply long 1383// Covered by "SchedAlias (WriteV[dq]...)" above 1384 1385// Crypto AES ops 1386def V1WriteVC : WriteSequence<[V1Write_2c_1V]>; 1387def V1ReadVC : SchedReadAdvance<2, [V1WriteVC]>; 1388def : InstRW<[V1WriteVC], (instrs AESDrr, AESErr)>; 1389def : InstRW<[V1Write_2c_1V, V1ReadVC], (instrs AESMCrr, AESIMCrr)>; 1390 1391// Crypto SHA1 hash acceleration op 1392// Crypto SHA1 schedule acceleration ops 1393// Crypto SHA256 schedule acceleration ops 1394// Crypto SHA512 hash acceleration ops 1395// Crypto SM3 ops 1396def : InstRW<[V1Write_2c_1V0], (instregex "^SHA1(H|SU[01])rr$", 1397 "^SHA256SU[01]rr$", 1398 "^SHA512(H2?|SU[01])$", 1399 "^SM3(PARTW(1|2SM3SS1)|TT[12][AB])$")>; 1400 1401// Crypto SHA1 hash acceleration ops 1402// Crypto SHA256 hash acceleration ops 1403// Crypto SM4 ops 1404def : InstRW<[V1Write_4c_1V0], (instregex "^SHA1[CMP]rrr$", 1405 "^SHA256H2?rrr$", 1406 "^SM4E(KEY)?$")>; 1407 1408// Crypto SHA3 ops 1409def : InstRW<[V1Write_2c_1V0], (instrs BCAX, EOR3, RAX1, XAR)>; 1410 1411 1412// CRC instruction 1413// ----------------------------------------------------------------------------- 1414 1415// CRC checksum ops 1416def : InstRW<[V1Wr_CRC, V1Rd_CRC], (instregex "^CRC32C?[BHWX]rr$")>; 1417 1418 1419// SVE Predicate instructions 1420// ----------------------------------------------------------------------------- 1421 1422// Loop control, based on predicate 1423def : InstRW<[V1Write_2c_1M0], (instregex "^BRK[AB]_PP[mz]P$")>; 1424def : InstRW<[V1Write_2c_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>; 1425 1426// Loop control, based on predicate and flag setting 1427def : InstRW<[V1Write_3c_2M0], (instrs BRKAS_PPzP, BRKBS_PPzP, BRKNS_PPzP, 1428 BRKPAS_PPzPP, BRKPBS_PPzPP)>; 1429 1430// Loop control, based on GPR 1431def : InstRW<[V1Write_3c_2M0], (instregex "^WHILE(LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>; 1432 1433// Loop terminate 1434def : InstRW<[V1Write_1c_1M0], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>; 1435 1436// Predicate counting scalar 1437// Predicate counting scalar, active predicate 1438def : InstRW<[V1Write_2c_1M0], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; 1439def : InstRW<[V1Write_2c_1M0], (instregex "^(CNT|([SU]Q)?(DEC|INC))[BHWD]_XPiI$", 1440 "^SQ(DEC|INC)[BHWD]_XPiWdI$", 1441 "^UQ(DEC|INC)[BHWD]_WPiI$", 1442 "^CNTP_XPP_[BHSD]$", 1443 "^([SU]Q)?(DEC|INC)P_XP_[BHSD]$", 1444 "^UQ(DEC|INC)P_WP_[BHSD]$", 1445 "^[SU]Q(DEC|INC)P_XPWd_[BHSD]$")>; 1446 1447// Predicate counting vector, active predicate 1448def : InstRW<[V1Write_7c_2M0_1V01], (instregex "^([SU]Q)?(DEC|INC)P_ZP_[HSD]$")>; 1449 1450// Predicate logical 1451def : InstRW<[V1Write_1c_1M0], 1452 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>; 1453 1454// Predicate logical, flag setting 1455def : InstRW<[V1Write_2c_2M0], 1456 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)S_PPzPP$")>; 1457 1458// Predicate reverse 1459// Predicate set/initialize/find next 1460// Predicate transpose 1461// Predicate unpack and widen 1462// Predicate zip/unzip 1463def : InstRW<[V1Write_2c_1M0], (instregex "^REV_PP_[BHSD]$", 1464 "^PFALSE$", "^PFIRST_B$", 1465 "^PNEXT_[BHSD]$", "^PTRUE_[BHSD]$", 1466 "^TRN[12]_PPP_[BHSDQ]$", 1467 "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>; 1468 1469// Predicate set/initialize/find next 1470// Predicate unpack and widen 1471def : InstRW<[V1Write_2c_1M0], (instrs PTEST_PP, 1472 PUNPKHI_PP, PUNPKLO_PP)>; 1473 1474// Predicate select 1475def : InstRW<[V1Write_1c_1M0], (instrs SEL_PPPP)>; 1476 1477// Predicate set/initialize, set flags 1478def : InstRW<[V1Write_3c_2M0], (instregex "^PTRUES_[BHSD]$")>; 1479 1480 1481 1482// SVE integer instructions 1483// ----------------------------------------------------------------------------- 1484 1485// Arithmetic, basic 1486// Logical 1487def : InstRW<[V1Write_2c_1V01], 1488 (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]", 1489 "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]", 1490 "^ADR_[SU]XTW_ZZZ_D_[0123]$", 1491 "^ADR_LSL_ZZZ_[SD]_[0123]$", 1492 "^[SU]ABD_ZP[mZ]Z_[BHSD]", 1493 "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]", 1494 "^[SU]Q(ADD|SUB)_Z(I|ZZ)_[BHSD]$", 1495 "^SUBR_Z(I|P[mZ]Z)_[BHSD]", 1496 "^(AND|EOR|ORR)_ZI$", 1497 "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZP?ZZ", 1498 "^EOR(BT|TB)_ZZZ_[BHSD]$", 1499 "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]")>; 1500 1501// Arithmetic, shift 1502def : InstRW<[V1Write_2c_1V1], 1503 (instregex "^(ASR|LSL|LSR)_WIDE_Z(Pm|Z)Z_[BHS]", 1504 "^(ASR|LSL|LSR)_ZPm[IZ]_[BHSD]", 1505 "^(ASR|LSL|LSR)_ZZI_[BHSD]", 1506 "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]", 1507 "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>; 1508 1509// Arithmetic, shift right for divide 1510def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>; 1511 1512// Count/reverse bits 1513def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>; 1514 1515// Broadcast logical bitmask immediate to vector 1516def : InstRW<[V1Write_2c_1V01], (instrs DUPM_ZI)>; 1517 1518// Compare and set flags 1519def : InstRW<[V1Write_4c_1M0_1V0], 1520 (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$", 1521 "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>; 1522 1523// Conditional extract operations, scalar form 1524def : InstRW<[V1Write_9c_1M0_1V1], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>; 1525 1526// Conditional extract operations, SIMD&FP scalar and vector forms 1527def : InstRW<[V1Write_3c_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$", 1528 "^COMPACT_ZPZ_[SD]$", 1529 "^SPLICE_ZPZZ?_[BHSD]$")>; 1530 1531// Convert to floating point, 64b to float or convert to double 1532def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]", 1533 "^[SU]CVTF_ZPmZ_StoD")>; 1534 1535// Convert to floating point, 32b to single or half 1536def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>; 1537 1538// Convert to floating point, 16b to half 1539def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>; 1540 1541// Copy, scalar 1542def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>; 1543 1544// Copy, scalar SIMD&FP or imm 1545def : InstRW<[V1Write_2c_1V01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>; 1546 1547// Divides, 32 bit 1548def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S", 1549 "^[SU]DIV_ZPZZ_S")>; 1550 1551// Divides, 64 bit 1552def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D", 1553 "^[SU]DIV_ZPZZ_D")>; 1554 1555// Dot product, 8 bit 1556def : InstRW<[V1Wr_ZDOTB, V1Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S$")>; 1557 1558// Dot product, 8 bit, using signed and unsigned integers 1559def : InstRW<[V1Wr_ZUDOTB, V1Rd_ZUDOTB], 1560 (instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>; 1561 1562// Dot product, 16 bit 1563def : InstRW<[V1Wr_ZDOTH, V1Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D$")>; 1564 1565// Duplicate, immediate and indexed form 1566def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$", 1567 "^DUP_ZZI_[BHSDQ]$")>; 1568 1569// Duplicate, scalar form 1570def : InstRW<[V1Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]$")>; 1571 1572// Extend, sign or zero 1573def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]", 1574 "^[SU]XTH_ZPmZ_[SD]", 1575 "^[SU]XTW_ZPmZ_[D]")>; 1576 1577// Extract 1578def : InstRW<[V1Write_2c_1V01], (instrs EXT_ZZI)>; 1579 1580// Extract/insert operation, SIMD and FP scalar form 1581def : InstRW<[V1Write_3c_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$", 1582 "^INSR_ZV_[BHSD]$")>; 1583 1584// Extract/insert operation, scalar 1585def : InstRW<[V1Write_6c_1M0_1V1], (instregex "^LAST[AB]_RPZ_[BHSD]$", 1586 "^INSR_ZR_[BHSD]$")>; 1587 1588// Horizontal operations, B, H, S form, imm, imm 1589def : InstRW<[V1Write_4c_1V0], (instregex "^INDEX_II_[BHS]$")>; 1590 1591// Horizontal operations, B, H, S form, scalar, imm / scalar / imm, scalar 1592def : InstRW<[V1Write_7c_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>; 1593 1594// Horizontal operations, D form, imm, imm 1595def : InstRW<[V1Write_5c_2V0], (instrs INDEX_II_D)>; 1596 1597// Horizontal operations, D form, scalar, imm / scalar / imm, scalar 1598def : InstRW<[V1Write_8c_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>; 1599 1600// Move prefix 1601def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$", 1602 "^MOVPRFX_ZZ$")>; 1603 1604// Matrix multiply-accumulate 1605def : InstRW<[V1Wr_ZMMA, V1Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; 1606 1607// Multiply, B, H, S element size 1608def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]", 1609 "^MUL_ZPZZ_[BHS]", 1610 "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]", 1611 "^[SU]MULH_ZPZZ_[BHS]")>; 1612 1613// Multiply, D element size 1614def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D", 1615 "^MUL_ZPZZ_D", 1616 "^[SU]MULH_(ZPmZ|ZZZ)_D", 1617 "^[SU]MULH_ZPZZ_D")>; 1618 1619// Multiply accumulate, D element size 1620def : InstRW<[V1Wr_ZMAD, V1Rd_ZMAD], 1621 (instregex "^ML[AS]_ZPZZZ_D")>; 1622def : InstRW<[V1Wr_ZMAD, ReadDefault, V1Rd_ZMAD], 1623 (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>; 1624 1625// Multiply accumulate, B, H, S element size 1626// NOTE: This is not specified in the SOG. 1627def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>; 1628 1629// Predicate counting vector 1630def : InstRW<[V1Write_2c_1V0], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>; 1631 1632// Reduction, arithmetic, B form 1633def : InstRW<[V1Write_14c_1V_1V0_2V1_1V13], 1634 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; 1635 1636// Reduction, arithmetic, H form 1637def : InstRW<[V1Write_12c_1V_1V01_2V1], 1638 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>; 1639 1640// Reduction, arithmetic, S form 1641def : InstRW<[V1Write_10c_1V_1V01_2V1], 1642 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>; 1643 1644// Reduction, arithmetic, D form 1645def : InstRW<[V1Write_8c_1V_1V01], 1646 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; 1647 1648// Reduction, logical 1649def : InstRW<[V1Write_12c_4V01], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>; 1650 1651// Reverse, vector 1652def : InstRW<[V1Write_2c_1V01], (instregex "^REV_ZZ_[BHSD]$", 1653 "^REVB_ZPmZ_[HSD]$", 1654 "^REVH_ZPmZ_[SD]$", 1655 "^REVW_ZPmZ_D$")>; 1656 1657// Select, vector form 1658// Table lookup 1659// Table lookup extension 1660// Transpose, vector form 1661// Unpack and extend 1662// Zip/unzip 1663def : InstRW<[V1Write_2c_1V01], (instregex "^SEL_ZPZZ_[BHSD]$", 1664 "^TB[LX]_ZZZ_[BHSD]$", 1665 "^TRN[12]_ZZZ_[BHSDQ]$", 1666 "^[SU]UNPK(HI|LO)_ZZ_[HSD]$", 1667 "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>; 1668 1669 1670// SVE floating-point instructions 1671// ----------------------------------------------------------------------------- 1672 1673// Floating point absolute value/difference 1674def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]", 1675 "^FABD_ZPZZ_[HSD]", 1676 "^FABS_ZPmZ_[HSD]")>; 1677 1678// Floating point arithmetic 1679def : InstRW<[V1Write_2c_1V01], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]", 1680 "^F(ADD|SUB)_ZPZ[IZ]_[HSD]", 1681 "^FADDP_ZPmZZ_[HSD]", 1682 "^FNEG_ZPmZ_[HSD]", 1683 "^FSUBR_ZPm[IZ]_[HSD]", 1684 "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>; 1685 1686// Floating point associative add, F16 1687def : InstRW<[V1Write_19c_18V0], (instrs FADDA_VPZ_H)>; 1688 1689// Floating point associative add, F32 1690def : InstRW<[V1Write_11c_10V0], (instrs FADDA_VPZ_S)>; 1691 1692// Floating point associative add, F64 1693def : InstRW<[V1Write_8c_3V01], (instrs FADDA_VPZ_D)>; 1694 1695// Floating point compare 1696def : InstRW<[V1Write_2c_1V0], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$", 1697 "^FCM(EQ|GE|GT|NE|UO)_PPzZZ_[HSD]$", 1698 "^FCM(EQ|GE|GT|LE|LT|NE)_PPzZ0_[HSD]$")>; 1699 1700// Floating point complex add 1701def : InstRW<[V1Write_3c_1V01], (instregex "^FCADD_ZPmZ_[HSD]$")>; 1702 1703// Floating point complex multiply add 1704def : InstRW<[V1Wr_ZFCMA, ReadDefault, V1Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>; 1705def : InstRW<[V1Wr_ZFCMA, V1Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]")>; 1706 1707// Floating point convert, long or narrow (F16 to F32 or F32 to F16) 1708// Floating point convert to integer, F32 1709def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)", 1710 "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>; 1711 1712// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) 1713// Floating point convert to integer, F64 1714def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)", 1715 "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>; 1716 1717// Floating point convert to integer, F16 1718def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>; 1719 1720// Floating point copy 1721def : InstRW<[V1Write_2c_1V01], (instregex "^FCPY_ZPmI_[HSD]$", 1722 "^FDUP_ZI_[HSD]$")>; 1723 1724// Floating point divide, F16 1725def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>; 1726 1727// Floating point divide, F32 1728def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>; 1729 1730// Floating point divide, F64 1731def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>; 1732 1733// Floating point min/max 1734def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]", 1735 "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>; 1736 1737// Floating point multiply 1738def : InstRW<[V1Write_3c_1V01], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]", 1739 "^FMULX_ZPZZ_[HSD]", 1740 "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]", 1741 "^FMUL_ZPZ[IZ]_[HSD]")>; 1742 1743// Floating point multiply accumulate 1744def : InstRW<[V1Wr_ZFMA, ReadDefault, V1Rd_ZFMA], 1745 (instregex "^FN?ML[AS]_ZPmZZ_[HSD]", 1746 "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>; 1747def : InstRW<[V1Wr_ZFMA, V1Rd_ZFMA], 1748 (instregex "^FML[AS]_ZZZI_[HSD]", 1749 "^FN?ML[AS]_ZPZZZ_[HSD]")>; 1750 1751// Floating point reciprocal step 1752def : InstRW<[V1Write_4c_1V01], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>; 1753 1754// Floating point reciprocal estimate, F16 1755def : InstRW<[V1Write_6c_4V0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>; 1756 1757// Floating point reciprocal estimate, F32 1758def : InstRW<[V1Write_4c_2V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>; 1759 1760// Floating point reciprocal estimate, F64 1761def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>; 1762 1763// Floating point reciprocal exponent 1764def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]")>; 1765 1766// Floating point reduction, F16 1767def : InstRW<[V1Write_13c_6V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>; 1768 1769// Floating point reduction, F32 1770def : InstRW<[V1Write_11c_1V_5V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$")>; 1771 1772// Floating point reduction, F64 1773def : InstRW<[V1Write_9c_1V_4V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>; 1774 1775// Floating point round to integral, F16 1776def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>; 1777 1778// Floating point round to integral, F32 1779def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; 1780 1781// Floating point round to integral, F64 1782def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; 1783 1784// Floating point square root, F16 1785def : InstRW<[V1Write_13c10_1V0], (instregex "^FSQRT_ZPmZ_H")>; 1786 1787// Floating point square root, F32 1788def : InstRW<[V1Write_10c7_1V0], (instregex "^FSQRT_ZPmZ_S")>; 1789 1790// Floating point square root, F64 1791def : InstRW<[V1Write_16c7_1V0], (instregex "^FSQRT_ZPmZ_D")>; 1792 1793// Floating point trigonometric 1794def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$", 1795 "^FTMAD_ZZI_[HSD]$", 1796 "^FTS(MUL|SEL)_ZZZ_[HSD]$")>; 1797 1798 1799// SVE BFloat16 (BF16) instructions 1800// ----------------------------------------------------------------------------- 1801 1802// Convert, F32 to BF16 1803def : InstRW<[V1Write_4c_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>; 1804 1805// Dot product 1806def : InstRW<[V1Wr_ZBFDOT, V1Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>; 1807 1808// Matrix multiply accumulate 1809def : InstRW<[V1Wr_ZBFMMA, V1Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>; 1810 1811// Multiply accumulate long 1812def : InstRW<[V1Wr_ZBFMAL, V1Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>; 1813 1814 1815// SVE Load instructions 1816// ----------------------------------------------------------------------------- 1817 1818// Load vector 1819def : InstRW<[V1Write_6c_1L01], (instrs LDR_ZXI)>; 1820 1821// Load predicate 1822def : InstRW<[V1Write_6c_1L_1M], (instrs LDR_PXI)>; 1823 1824// Contiguous load, scalar + imm 1825// Contiguous load, scalar + scalar 1826// Contiguous load broadcast, scalar + imm 1827// Contiguous load broadcast, scalar + scalar 1828def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BHWD]_IMM$", 1829 "^LD1S?B_[HSD]_IMM$", 1830 "^LD1S?H_[SD]_IMM$", 1831 "^LD1S?W_D_IMM$", 1832 "^LD1[BWD]$", 1833 "^LD1S?B_[HSD]$", 1834 "^LD1S?W_D$", 1835 "^LD1R[BHWD]_IMM$", 1836 "^LD1RSW_IMM$", 1837 "^LD1RS?B_[HSD]_IMM$", 1838 "^LD1RS?H_[SD]_IMM$", 1839 "^LD1RS?W_D_IMM$", 1840 "^LD1RQ_[BHWD]_IMM$", 1841 "^LD1RQ_[BWD]$")>; 1842def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LD1H$", 1843 "^LD1S?H_[SD]$", 1844 "^LD1RQ_H$")>; 1845 1846// Non temporal load, scalar + imm 1847def : InstRW<[V1Write_6c_1L01], (instregex "^LDNT1[BHWD]_ZRI$")>; 1848 1849// Non temporal load, scalar + scalar 1850def : InstRW<[V1Write_7c_1L01_1S], (instrs LDNT1H_ZRR)>; 1851def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDNT1[BWD]_ZRR$")>; 1852 1853// Contiguous first faulting load, scalar + scalar 1854def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H$", 1855 "^LDFF1S?H_[SD]$")>; 1856def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]$", 1857 "^LDFF1S?B_[HSD]$", 1858 "^LDFF1S?W_D$")>; 1859 1860// Contiguous non faulting load, scalar + imm 1861def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM$", 1862 "^LDNF1S?B_[HSD]_IMM$", 1863 "^LDNF1S?H_[SD]_IMM$", 1864 "^LDNF1S?W_D_IMM$")>; 1865 1866// Contiguous Load two structures to two vectors, scalar + imm 1867def : InstRW<[V1Write_8c_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>; 1868 1869// Contiguous Load two structures to two vectors, scalar + scalar 1870def : InstRW<[V1Write_10c_2L01_2V01], (instrs LD2H)>; 1871def : InstRW<[V1Write_9c_2L01_2V01], (instregex "^LD2[BWD]$")>; 1872 1873// Contiguous Load three structures to three vectors, scalar + imm 1874def : InstRW<[V1Write_11c_3L01_3V01], (instregex "^LD3[BHWD]_IMM$")>; 1875 1876// Contiguous Load three structures to three vectors, scalar + scalar 1877def : InstRW<[V1Write_13c_3L01_1S_3V01], (instregex "^LD3[BHWD]$")>; 1878 1879// Contiguous Load four structures to four vectors, scalar + imm 1880def : InstRW<[V1Write_12c_4L01_4V01], (instregex "^LD4[BHWD]_IMM$")>; 1881 1882// Contiguous Load four structures to four vectors, scalar + scalar 1883def : InstRW<[V1Write_13c_4L01_2S_4V01], (instregex "^LD4[BHWD]$")>; 1884 1885// Gather load, vector + imm, 32-bit element size 1886def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$", 1887 "^GLD(FF)?1W_IMM$")>; 1888 1889// Gather load, vector + imm, 64-bit element size 1890def : InstRW<[V1Write_9c_2L_2V], 1891 (instregex "^GLD(FF)?1S?[BHW]_D_IMM$", 1892 "^GLD(FF)?1S?[BHW]_D(_[SU]XTW)?(_SCALED)?$", 1893 "^GLD(FF)?1D_IMM$", 1894 "^GLD(FF)?1D(_[SU]XTW)?(_SCALED)?$")>; 1895 1896// Gather load, 32-bit scaled offset 1897def : InstRW<[V1Write_11c_2L_2V], 1898 (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$", 1899 "^GLD(FF)?1W_[SU]XTW_SCALED")>; 1900 1901// Gather load, 32-bit unpacked unscaled offset 1902def : InstRW<[V1Write_9c_1L_1V], 1903 (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$", 1904 "^GLD(FF)?1W_[SU]XTW$")>; 1905 1906// Prefetch 1907// NOTE: This is not specified in the SOG. 1908def : InstRW<[V1Write_4c_1L01], (instregex "^PRF[BHWD]")>; 1909 1910 1911// SVE Store instructions 1912// ----------------------------------------------------------------------------- 1913 1914// Store from predicate reg 1915def : InstRW<[V1Write_1c_1L01], (instrs STR_PXI)>; 1916 1917// Store from vector reg 1918def : InstRW<[V1Write_2c_1L01_1V], (instrs STR_ZXI)>; 1919 1920// Contiguous store, scalar + imm 1921// Contiguous store, scalar + scalar 1922def : InstRW<[V1Write_2c_1L01_1V], (instregex "^ST1[BHWD]_IMM$", 1923 "^ST1B_[HSD]_IMM$", 1924 "^ST1H_[SD]_IMM$", 1925 "^ST1W_D_IMM$", 1926 "^ST1[BWD]$", 1927 "^ST1B_[HSD]$", 1928 "^ST1W_D$")>; 1929def : InstRW<[V1Write_2c_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>; 1930 1931// Contiguous store two structures from two vectors, scalar + imm 1932// Contiguous store two structures from two vectors, scalar + scalar 1933def : InstRW<[V1Write_4c_1L01_1V], (instregex "^ST2[BHWD]_IMM$", 1934 "^ST2[BWD]$")>; 1935def : InstRW<[V1Write_4c_1L01_1S_1V], (instrs ST2H)>; 1936 1937// Contiguous store three structures from three vectors, scalar + imm 1938def : InstRW<[V1Write_7c_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>; 1939 1940// Contiguous store three structures from three vectors, scalar + scalar 1941def : InstRW<[V1Write_7c_5L01_5S_5V], (instregex "^ST3[BHWD]$")>; 1942 1943// Contiguous store four structures from four vectors, scalar + imm 1944def : InstRW<[V1Write_11c_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>; 1945 1946// Contiguous store four structures from four vectors, scalar + scalar 1947def : InstRW<[V1Write_11c_9L01_9S_9V], (instregex "^ST4[BHWD]$")>; 1948 1949// Non temporal store, scalar + imm 1950// Non temporal store, scalar + scalar 1951def : InstRW<[V1Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$", 1952 "^STNT1[BWD]_ZRR$")>; 1953def : InstRW<[V1Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>; 1954 1955// Scatter store vector + imm 32-bit element size 1956// Scatter store, 32-bit scaled offset 1957// Scatter store, 32-bit unscaled offset 1958def : InstRW<[V1Write_10c_2L01_2V], (instregex "^SST1[BH]_S_IMM$", 1959 "^SST1W_IMM$", 1960 "^SST1(H_S|W)_[SU]XTW_SCALED$", 1961 "^SST1[BH]_S_[SU]XTW$", 1962 "^SST1W_[SU]XTW$")>; 1963 1964// Scatter store, 32-bit unpacked unscaled offset 1965// Scatter store, 32-bit unpacked scaled offset 1966def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$", 1967 "^SST1D_[SU]XTW$", 1968 "^SST1[HW]_D_[SU]XTW_SCALED$", 1969 "^SST1D_[SU]XTW_SCALED$")>; 1970 1971// Scatter store vector + imm 64-bit element size 1972// Scatter store, 64-bit scaled offset 1973// Scatter store, 64-bit unscaled offset 1974def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_IMM$", 1975 "^SST1D_IMM$", 1976 "^SST1[HW]_D_SCALED$", 1977 "^SST1D_SCALED$", 1978 "^SST1[BHW]_D$", 1979 "^SST1D$")>; 1980 1981 1982// SVE Miscellaneous instructions 1983// ----------------------------------------------------------------------------- 1984 1985// Read first fault register, unpredicated 1986// Set first fault register 1987// Write to first fault register 1988def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P, 1989 SETFFR, 1990 WRFFR)>; 1991 1992// Read first fault register, predicated 1993def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz)>; 1994 1995// Read first fault register and set flags 1996def : InstRW<[V1Write_4c_1M], (instrs RDFFRS_PPz)>; 1997 1998 1999} 2000