1*7330f729Sjoerg//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=// 2*7330f729Sjoerg// 3*7330f729Sjoerg// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*7330f729Sjoerg// See https://llvm.org/LICENSE.txt for license information. 5*7330f729Sjoerg// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*7330f729Sjoerg// 7*7330f729Sjoerg//===----------------------------------------------------------------------===// 8*7330f729Sjoerg// 9*7330f729Sjoerg// This file defines the itinerary class data for the ARM Cortex A8 processors. 10*7330f729Sjoerg// 11*7330f729Sjoerg//===----------------------------------------------------------------------===// 12*7330f729Sjoerg 13*7330f729Sjoerg// 14*7330f729Sjoerg// Scheduling information derived from "Cortex-A8 Technical Reference Manual". 15*7330f729Sjoerg// Functional Units. 16*7330f729Sjoergdef A8_Pipe0 : FuncUnit; // pipeline 0 17*7330f729Sjoergdef A8_Pipe1 : FuncUnit; // pipeline 1 18*7330f729Sjoergdef A8_LSPipe : FuncUnit; // Load / store pipeline 19*7330f729Sjoergdef A8_NPipe : FuncUnit; // NEON ALU/MUL pipe 20*7330f729Sjoergdef A8_NLSPipe : FuncUnit; // NEON LS pipe 21*7330f729Sjoerg// 22*7330f729Sjoerg// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1 23*7330f729Sjoerg// 24*7330f729Sjoergdef CortexA8Itineraries : ProcessorItineraries< 25*7330f729Sjoerg [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe], 26*7330f729Sjoerg [], [ 27*7330f729Sjoerg // Two fully-pipelined integer ALU pipelines 28*7330f729Sjoerg // 29*7330f729Sjoerg // No operand cycles 30*7330f729Sjoerg InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, 31*7330f729Sjoerg // 32*7330f729Sjoerg // Binary Instructions that produce a result 33*7330f729Sjoerg InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 34*7330f729Sjoerg InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, 35*7330f729Sjoerg InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, 36*7330f729Sjoerg InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>, 37*7330f729Sjoerg InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, 38*7330f729Sjoerg // 39*7330f729Sjoerg // Bitwise Instructions that produce a result 40*7330f729Sjoerg InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 41*7330f729Sjoerg InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>, 42*7330f729Sjoerg InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, 43*7330f729Sjoerg InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>, 44*7330f729Sjoerg // 45*7330f729Sjoerg // Unary Instructions that produce a result 46*7330f729Sjoerg InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 47*7330f729Sjoerg InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, 48*7330f729Sjoerg // 49*7330f729Sjoerg // Zero and sign extension instructions 50*7330f729Sjoerg InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, 51*7330f729Sjoerg InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>, 52*7330f729Sjoerg InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>, 53*7330f729Sjoerg // 54*7330f729Sjoerg // Compare instructions 55*7330f729Sjoerg InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, 56*7330f729Sjoerg InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 57*7330f729Sjoerg InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, 58*7330f729Sjoerg InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, 59*7330f729Sjoerg // 60*7330f729Sjoerg // Test instructions 61*7330f729Sjoerg InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, 62*7330f729Sjoerg InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 63*7330f729Sjoerg InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, 64*7330f729Sjoerg InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, 65*7330f729Sjoerg // 66*7330f729Sjoerg // Move instructions, unconditional 67*7330f729Sjoerg InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, 68*7330f729Sjoerg InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, 69*7330f729Sjoerg InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, 70*7330f729Sjoerg InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, 71*7330f729Sjoerg InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 72*7330f729Sjoerg InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, 73*7330f729Sjoerg InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 74*7330f729Sjoerg InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 75*7330f729Sjoerg InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>, 76*7330f729Sjoerg InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 77*7330f729Sjoerg InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 78*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [5]>, 79*7330f729Sjoerg // 80*7330f729Sjoerg // Move instructions, conditional 81*7330f729Sjoerg InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>, 82*7330f729Sjoerg InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, 83*7330f729Sjoerg InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>, 84*7330f729Sjoerg InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>, 85*7330f729Sjoerg InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 86*7330f729Sjoerg InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>, 87*7330f729Sjoerg // 88*7330f729Sjoerg // MVN instructions 89*7330f729Sjoerg InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>, 90*7330f729Sjoerg InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, 91*7330f729Sjoerg InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>, 92*7330f729Sjoerg InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>, 93*7330f729Sjoerg 94*7330f729Sjoerg // Integer multiply pipeline 95*7330f729Sjoerg // Result written in E5, but that is relative to the last cycle of multicycle, 96*7330f729Sjoerg // so we use 6 for those cases 97*7330f729Sjoerg // 98*7330f729Sjoerg InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>, 99*7330f729Sjoerg InstrItinData<IIC_iMAC16 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, 100*7330f729Sjoerg InstrItinData<IIC_iMUL32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>, 101*7330f729Sjoerg InstrItinData<IIC_iMAC32 , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, 102*7330f729Sjoerg InstrItinData<IIC_iMUL64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, 103*7330f729Sjoerg InstrItinData<IIC_iMAC64 , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, 104*7330f729Sjoerg 105*7330f729Sjoerg // Integer load pipeline 106*7330f729Sjoerg // 107*7330f729Sjoerg // Immediate offset 108*7330f729Sjoerg InstrItinData<IIC_iLoad_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 109*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 1]>, 110*7330f729Sjoerg InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 111*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 1]>, 112*7330f729Sjoerg InstrItinData<IIC_iLoad_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 113*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 1]>, 114*7330f729Sjoerg // 115*7330f729Sjoerg // Register offset 116*7330f729Sjoerg InstrItinData<IIC_iLoad_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 117*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 118*7330f729Sjoerg InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 119*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 120*7330f729Sjoerg InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 121*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 122*7330f729Sjoerg // 123*7330f729Sjoerg // Scaled register offset, issues over 2 cycles 124*7330f729Sjoerg // FIXME: lsl by 2 takes 1 cycle. 125*7330f729Sjoerg InstrItinData<IIC_iLoad_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 126*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>, 127*7330f729Sjoerg InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 128*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>, 129*7330f729Sjoerg // 130*7330f729Sjoerg // Immediate offset with update 131*7330f729Sjoerg InstrItinData<IIC_iLoad_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 132*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>, 133*7330f729Sjoerg InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 134*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>, 135*7330f729Sjoerg // 136*7330f729Sjoerg // Register offset with update 137*7330f729Sjoerg InstrItinData<IIC_iLoad_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 138*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>, 139*7330f729Sjoerg InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 140*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>, 141*7330f729Sjoerg InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 142*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>, 143*7330f729Sjoerg // 144*7330f729Sjoerg // Scaled register offset with update, issues over 2 cycles 145*7330f729Sjoerg InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 146*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>, 147*7330f729Sjoerg InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 148*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>, 149*7330f729Sjoerg // 150*7330f729Sjoerg // Load multiple, def is the 5th operand. Pipeline 0 only. 151*7330f729Sjoerg // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. 152*7330f729Sjoerg InstrItinData<IIC_iLoad_m , [InstrStage<2, [A8_Pipe0], 0>, 153*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 154*7330f729Sjoerg [1, 1, 1, 1, 3], [], -1>, // dynamic uops 155*7330f729Sjoerg // 156*7330f729Sjoerg // Load multiple + update, defs are the 1st and 5th operands. 157*7330f729Sjoerg InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>, 158*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 159*7330f729Sjoerg [2, 1, 1, 1, 3], [], -1>, // dynamic uops 160*7330f729Sjoerg // 161*7330f729Sjoerg // Load multiple plus branch 162*7330f729Sjoerg InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>, 163*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>, 164*7330f729Sjoerg InstrStage<1, [A8_Pipe0, A8_Pipe1]>], 165*7330f729Sjoerg [1, 2, 1, 1, 3], [], -1>, // dynamic uops 166*7330f729Sjoerg // 167*7330f729Sjoerg // Pop, def is the 3rd operand. 168*7330f729Sjoerg InstrItinData<IIC_iPop , [InstrStage<3, [A8_Pipe0], 0>, 169*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 170*7330f729Sjoerg [1, 1, 3], [], -1>, // dynamic uops 171*7330f729Sjoerg // 172*7330f729Sjoerg // Push, def is the 3th operand. 173*7330f729Sjoerg InstrItinData<IIC_iPop_Br, [InstrStage<3, [A8_Pipe0], 0>, 174*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>, 175*7330f729Sjoerg InstrStage<1, [A8_Pipe0, A8_Pipe1]>], 176*7330f729Sjoerg [1, 1, 3], [], -1>, // dynamic uops 177*7330f729Sjoerg // 178*7330f729Sjoerg // iLoadi + iALUr for t2LDRpci_pic. 179*7330f729Sjoerg InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 180*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>, 181*7330f729Sjoerg InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>, 182*7330f729Sjoerg 183*7330f729Sjoerg 184*7330f729Sjoerg // Integer store pipeline 185*7330f729Sjoerg // 186*7330f729Sjoerg // Immediate offset 187*7330f729Sjoerg InstrItinData<IIC_iStore_i , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 188*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 1]>, 189*7330f729Sjoerg InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 190*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 1]>, 191*7330f729Sjoerg InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 192*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 1]>, 193*7330f729Sjoerg // 194*7330f729Sjoerg // Register offset 195*7330f729Sjoerg InstrItinData<IIC_iStore_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 196*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 197*7330f729Sjoerg InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 198*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 199*7330f729Sjoerg InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 200*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>, 201*7330f729Sjoerg // 202*7330f729Sjoerg // Scaled register offset, issues over 2 cycles 203*7330f729Sjoerg InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 204*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>, 205*7330f729Sjoerg InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 206*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>, 207*7330f729Sjoerg // 208*7330f729Sjoerg // Immediate offset with update 209*7330f729Sjoerg InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 210*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>, 211*7330f729Sjoerg InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 212*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>, 213*7330f729Sjoerg // 214*7330f729Sjoerg // Register offset with update 215*7330f729Sjoerg InstrItinData<IIC_iStore_ru , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 216*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>, 217*7330f729Sjoerg InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 218*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>, 219*7330f729Sjoerg InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 220*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>, 221*7330f729Sjoerg // 222*7330f729Sjoerg // Scaled register offset with update, issues over 2 cycles 223*7330f729Sjoerg InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 224*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>, 225*7330f729Sjoerg InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>, 226*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>, 227*7330f729Sjoerg // 228*7330f729Sjoerg // Store multiple. Pipeline 0 only. 229*7330f729Sjoerg // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. 230*7330f729Sjoerg InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>, 231*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 232*7330f729Sjoerg [], [], -1>, // dynamic uops 233*7330f729Sjoerg // 234*7330f729Sjoerg // Store multiple + update 235*7330f729Sjoerg InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>, 236*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 237*7330f729Sjoerg [2], [], -1>, // dynamic uops 238*7330f729Sjoerg // 239*7330f729Sjoerg // Preload 240*7330f729Sjoerg InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>, 241*7330f729Sjoerg 242*7330f729Sjoerg // Branch 243*7330f729Sjoerg // 244*7330f729Sjoerg // no delay slots, so the latency of a branch is unimportant 245*7330f729Sjoerg InstrItinData<IIC_Br , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>, 246*7330f729Sjoerg 247*7330f729Sjoerg // VFP 248*7330f729Sjoerg // Issue through integer pipeline, and execute in NEON unit. We assume 249*7330f729Sjoerg // RunFast mode so that NFP pipeline is used for single-precision when 250*7330f729Sjoerg // possible. 251*7330f729Sjoerg // 252*7330f729Sjoerg // FP Special Register to Integer Register File Move 253*7330f729Sjoerg InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 254*7330f729Sjoerg InstrStage<1, [A8_NLSPipe]>], [20]>, 255*7330f729Sjoerg // 256*7330f729Sjoerg // Single-precision FP Unary 257*7330f729Sjoerg InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 258*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [7, 1]>, 259*7330f729Sjoerg // 260*7330f729Sjoerg // Double-precision FP Unary 261*7330f729Sjoerg InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 262*7330f729Sjoerg InstrStage<4, [A8_NPipe], 0>, 263*7330f729Sjoerg InstrStage<4, [A8_NLSPipe]>], [4, 1]>, 264*7330f729Sjoerg // 265*7330f729Sjoerg // Single-precision FP Compare 266*7330f729Sjoerg InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 267*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [1, 1]>, 268*7330f729Sjoerg // 269*7330f729Sjoerg // Double-precision FP Compare 270*7330f729Sjoerg InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 271*7330f729Sjoerg InstrStage<4, [A8_NPipe], 0>, 272*7330f729Sjoerg InstrStage<4, [A8_NLSPipe]>], [4, 1]>, 273*7330f729Sjoerg // 274*7330f729Sjoerg // Single to Double FP Convert 275*7330f729Sjoerg InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 276*7330f729Sjoerg InstrStage<7, [A8_NPipe], 0>, 277*7330f729Sjoerg InstrStage<7, [A8_NLSPipe]>], [7, 1]>, 278*7330f729Sjoerg // 279*7330f729Sjoerg // Double to Single FP Convert 280*7330f729Sjoerg InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 281*7330f729Sjoerg InstrStage<5, [A8_NPipe], 0>, 282*7330f729Sjoerg InstrStage<5, [A8_NLSPipe]>], [5, 1]>, 283*7330f729Sjoerg // 284*7330f729Sjoerg // Single-Precision FP to Integer Convert 285*7330f729Sjoerg InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 286*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [7, 1]>, 287*7330f729Sjoerg // 288*7330f729Sjoerg // Double-Precision FP to Integer Convert 289*7330f729Sjoerg InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 290*7330f729Sjoerg InstrStage<8, [A8_NPipe], 0>, 291*7330f729Sjoerg InstrStage<8, [A8_NLSPipe]>], [8, 1]>, 292*7330f729Sjoerg // 293*7330f729Sjoerg // Integer to Single-Precision FP Convert 294*7330f729Sjoerg InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 295*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [7, 1]>, 296*7330f729Sjoerg // 297*7330f729Sjoerg // Integer to Double-Precision FP Convert 298*7330f729Sjoerg InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 299*7330f729Sjoerg InstrStage<8, [A8_NPipe], 0>, 300*7330f729Sjoerg InstrStage<8, [A8_NLSPipe]>], [8, 1]>, 301*7330f729Sjoerg // 302*7330f729Sjoerg // Single-precision FP ALU 303*7330f729Sjoerg InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 304*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, 305*7330f729Sjoerg // 306*7330f729Sjoerg // Double-precision FP ALU 307*7330f729Sjoerg InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 308*7330f729Sjoerg InstrStage<9, [A8_NPipe], 0>, 309*7330f729Sjoerg InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>, 310*7330f729Sjoerg // 311*7330f729Sjoerg // Single-precision FP Multiply 312*7330f729Sjoerg InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 313*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [7, 1, 1]>, 314*7330f729Sjoerg // 315*7330f729Sjoerg // Double-precision FP Multiply 316*7330f729Sjoerg InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 317*7330f729Sjoerg InstrStage<11, [A8_NPipe], 0>, 318*7330f729Sjoerg InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>, 319*7330f729Sjoerg // 320*7330f729Sjoerg // Single-precision FP MAC 321*7330f729Sjoerg InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 322*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, 323*7330f729Sjoerg // 324*7330f729Sjoerg // Double-precision FP MAC 325*7330f729Sjoerg InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 326*7330f729Sjoerg InstrStage<19, [A8_NPipe], 0>, 327*7330f729Sjoerg InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, 328*7330f729Sjoerg // 329*7330f729Sjoerg // Single-precision Fused FP MAC 330*7330f729Sjoerg InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 331*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>, 332*7330f729Sjoerg // 333*7330f729Sjoerg // Double-precision Fused FP MAC 334*7330f729Sjoerg InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 335*7330f729Sjoerg InstrStage<19, [A8_NPipe], 0>, 336*7330f729Sjoerg InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>, 337*7330f729Sjoerg // 338*7330f729Sjoerg // Single-precision FP DIV 339*7330f729Sjoerg InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 340*7330f729Sjoerg InstrStage<20, [A8_NPipe], 0>, 341*7330f729Sjoerg InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>, 342*7330f729Sjoerg // 343*7330f729Sjoerg // Double-precision FP DIV 344*7330f729Sjoerg InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 345*7330f729Sjoerg InstrStage<29, [A8_NPipe], 0>, 346*7330f729Sjoerg InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>, 347*7330f729Sjoerg // 348*7330f729Sjoerg // Single-precision FP SQRT 349*7330f729Sjoerg InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 350*7330f729Sjoerg InstrStage<19, [A8_NPipe], 0>, 351*7330f729Sjoerg InstrStage<19, [A8_NLSPipe]>], [19, 1]>, 352*7330f729Sjoerg // 353*7330f729Sjoerg // Double-precision FP SQRT 354*7330f729Sjoerg InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 355*7330f729Sjoerg InstrStage<29, [A8_NPipe], 0>, 356*7330f729Sjoerg InstrStage<29, [A8_NLSPipe]>], [29, 1]>, 357*7330f729Sjoerg 358*7330f729Sjoerg // 359*7330f729Sjoerg // Integer to Single-precision Move 360*7330f729Sjoerg InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 361*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], 362*7330f729Sjoerg [2, 1]>, 363*7330f729Sjoerg // 364*7330f729Sjoerg // Integer to Double-precision Move 365*7330f729Sjoerg InstrItinData<IIC_fpMOVID, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 366*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], 367*7330f729Sjoerg [2, 1, 1]>, 368*7330f729Sjoerg // 369*7330f729Sjoerg // Single-precision to Integer Move 370*7330f729Sjoerg InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 371*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], 372*7330f729Sjoerg [20, 1]>, 373*7330f729Sjoerg // 374*7330f729Sjoerg // Double-precision to Integer Move 375*7330f729Sjoerg InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 376*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], 377*7330f729Sjoerg [20, 20, 1]>, 378*7330f729Sjoerg 379*7330f729Sjoerg // 380*7330f729Sjoerg // Single-precision FP Load 381*7330f729Sjoerg InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 382*7330f729Sjoerg InstrStage<1, [A8_NLSPipe], 0>, 383*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], 384*7330f729Sjoerg [2, 1]>, 385*7330f729Sjoerg // 386*7330f729Sjoerg // Double-precision FP Load 387*7330f729Sjoerg InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 388*7330f729Sjoerg InstrStage<1, [A8_NLSPipe], 0>, 389*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], 390*7330f729Sjoerg [2, 1]>, 391*7330f729Sjoerg // 392*7330f729Sjoerg // FP Load Multiple 393*7330f729Sjoerg // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers. 394*7330f729Sjoerg InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 395*7330f729Sjoerg InstrStage<1, [A8_NLSPipe], 0>, 396*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>, 397*7330f729Sjoerg InstrStage<1, [A8_NLSPipe], 0>, 398*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], 399*7330f729Sjoerg [1, 1, 1, 2], [], -1>, // dynamic uops 400*7330f729Sjoerg // 401*7330f729Sjoerg // FP Load Multiple + update 402*7330f729Sjoerg InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 403*7330f729Sjoerg InstrStage<1, [A8_NLSPipe], 0>, 404*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>, 405*7330f729Sjoerg InstrStage<1, [A8_NLSPipe], 0>, 406*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], 407*7330f729Sjoerg [2, 1, 1, 1, 2], [], -1>, // dynamic uops 408*7330f729Sjoerg // 409*7330f729Sjoerg // Single-precision FP Store 410*7330f729Sjoerg InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 411*7330f729Sjoerg InstrStage<1, [A8_NLSPipe], 0>, 412*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], 413*7330f729Sjoerg [1, 1]>, 414*7330f729Sjoerg // 415*7330f729Sjoerg // Double-precision FP Store 416*7330f729Sjoerg InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 417*7330f729Sjoerg InstrStage<1, [A8_NLSPipe], 0>, 418*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], 419*7330f729Sjoerg [1, 1]>, 420*7330f729Sjoerg // 421*7330f729Sjoerg // FP Store Multiple 422*7330f729Sjoerg InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 423*7330f729Sjoerg InstrStage<1, [A8_NLSPipe], 0>, 424*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>, 425*7330f729Sjoerg InstrStage<1, [A8_NLSPipe], 0>, 426*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], 427*7330f729Sjoerg [1, 1, 1, 1], [], -1>, // dynamic uops 428*7330f729Sjoerg // 429*7330f729Sjoerg // FP Store Multiple + update 430*7330f729Sjoerg InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 431*7330f729Sjoerg InstrStage<1, [A8_NLSPipe], 0>, 432*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>, 433*7330f729Sjoerg InstrStage<1, [A8_NLSPipe], 0>, 434*7330f729Sjoerg InstrStage<1, [A8_LSPipe]>], 435*7330f729Sjoerg [2, 1, 1, 1, 1], [], -1>, // dynamic uops 436*7330f729Sjoerg // NEON 437*7330f729Sjoerg // Issue through integer pipeline, and execute in NEON unit. 438*7330f729Sjoerg // 439*7330f729Sjoerg // VLD1 440*7330f729Sjoerg InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 441*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 442*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 443*7330f729Sjoerg [2, 1]>, 444*7330f729Sjoerg // VLD1x2 445*7330f729Sjoerg InstrItinData<IIC_VLD1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 446*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 447*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 448*7330f729Sjoerg [2, 2, 1]>, 449*7330f729Sjoerg // 450*7330f729Sjoerg // VLD1x3 451*7330f729Sjoerg InstrItinData<IIC_VLD1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 452*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 453*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 454*7330f729Sjoerg [2, 2, 3, 1]>, 455*7330f729Sjoerg // 456*7330f729Sjoerg // VLD1x4 457*7330f729Sjoerg InstrItinData<IIC_VLD1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 458*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 459*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 460*7330f729Sjoerg [2, 2, 3, 3, 1]>, 461*7330f729Sjoerg // 462*7330f729Sjoerg // VLD1u 463*7330f729Sjoerg InstrItinData<IIC_VLD1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 464*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 465*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 466*7330f729Sjoerg [2, 2, 1]>, 467*7330f729Sjoerg // 468*7330f729Sjoerg // VLD1x2u 469*7330f729Sjoerg InstrItinData<IIC_VLD1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 470*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 471*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 472*7330f729Sjoerg [2, 2, 2, 1]>, 473*7330f729Sjoerg // 474*7330f729Sjoerg // VLD1x3u 475*7330f729Sjoerg InstrItinData<IIC_VLD1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 476*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 477*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 478*7330f729Sjoerg [2, 2, 3, 2, 1]>, 479*7330f729Sjoerg // 480*7330f729Sjoerg // VLD1x4u 481*7330f729Sjoerg InstrItinData<IIC_VLD1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 482*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 483*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 484*7330f729Sjoerg [2, 2, 3, 3, 2, 1]>, 485*7330f729Sjoerg // 486*7330f729Sjoerg // VLD1ln 487*7330f729Sjoerg InstrItinData<IIC_VLD1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 488*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 489*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 490*7330f729Sjoerg [3, 1, 1, 1]>, 491*7330f729Sjoerg // 492*7330f729Sjoerg // VLD1lnu 493*7330f729Sjoerg InstrItinData<IIC_VLD1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 494*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 495*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 496*7330f729Sjoerg [3, 2, 1, 1, 1, 1]>, 497*7330f729Sjoerg // 498*7330f729Sjoerg // VLD1dup 499*7330f729Sjoerg InstrItinData<IIC_VLD1dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 500*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 501*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 502*7330f729Sjoerg [2, 1]>, 503*7330f729Sjoerg // 504*7330f729Sjoerg // VLD1dupu 505*7330f729Sjoerg InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 506*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 507*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 508*7330f729Sjoerg [2, 2, 1, 1]>, 509*7330f729Sjoerg // 510*7330f729Sjoerg // VLD2 511*7330f729Sjoerg InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 512*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 513*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 514*7330f729Sjoerg [2, 2, 1]>, 515*7330f729Sjoerg // 516*7330f729Sjoerg // VLD2x2 517*7330f729Sjoerg InstrItinData<IIC_VLD2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 518*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 519*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 520*7330f729Sjoerg [2, 2, 3, 3, 1]>, 521*7330f729Sjoerg // 522*7330f729Sjoerg // VLD2ln 523*7330f729Sjoerg InstrItinData<IIC_VLD2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 524*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 525*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 526*7330f729Sjoerg [3, 3, 1, 1, 1, 1]>, 527*7330f729Sjoerg // 528*7330f729Sjoerg // VLD2u 529*7330f729Sjoerg InstrItinData<IIC_VLD2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 530*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 531*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 532*7330f729Sjoerg [2, 2, 2, 1, 1, 1]>, 533*7330f729Sjoerg // 534*7330f729Sjoerg // VLD2x2u 535*7330f729Sjoerg InstrItinData<IIC_VLD2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 536*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 537*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 538*7330f729Sjoerg [2, 2, 3, 3, 2, 1]>, 539*7330f729Sjoerg // 540*7330f729Sjoerg // VLD2lnu 541*7330f729Sjoerg InstrItinData<IIC_VLD2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 542*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 543*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 544*7330f729Sjoerg [3, 3, 2, 1, 1, 1, 1, 1]>, 545*7330f729Sjoerg // 546*7330f729Sjoerg // VLD2dup 547*7330f729Sjoerg InstrItinData<IIC_VLD2dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 548*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 549*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 550*7330f729Sjoerg [2, 2, 1]>, 551*7330f729Sjoerg // 552*7330f729Sjoerg // VLD2dupu 553*7330f729Sjoerg InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 554*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 555*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 556*7330f729Sjoerg [2, 2, 2, 1, 1]>, 557*7330f729Sjoerg // 558*7330f729Sjoerg // VLD3 559*7330f729Sjoerg InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 560*7330f729Sjoerg InstrStage<4, [A8_NLSPipe], 0>, 561*7330f729Sjoerg InstrStage<4, [A8_LSPipe]>], 562*7330f729Sjoerg [3, 3, 4, 1]>, 563*7330f729Sjoerg // 564*7330f729Sjoerg // VLD3ln 565*7330f729Sjoerg InstrItinData<IIC_VLD3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 566*7330f729Sjoerg InstrStage<5, [A8_NLSPipe], 0>, 567*7330f729Sjoerg InstrStage<5, [A8_LSPipe]>], 568*7330f729Sjoerg [4, 4, 5, 1, 1, 1, 1, 2]>, 569*7330f729Sjoerg // 570*7330f729Sjoerg // VLD3u 571*7330f729Sjoerg InstrItinData<IIC_VLD3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 572*7330f729Sjoerg InstrStage<4, [A8_NLSPipe], 0>, 573*7330f729Sjoerg InstrStage<4, [A8_LSPipe]>], 574*7330f729Sjoerg [3, 3, 4, 2, 1]>, 575*7330f729Sjoerg // 576*7330f729Sjoerg // VLD3lnu 577*7330f729Sjoerg InstrItinData<IIC_VLD3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 578*7330f729Sjoerg InstrStage<5, [A8_NLSPipe], 0>, 579*7330f729Sjoerg InstrStage<5, [A8_LSPipe]>], 580*7330f729Sjoerg [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>, 581*7330f729Sjoerg // 582*7330f729Sjoerg // VLD3dup 583*7330f729Sjoerg InstrItinData<IIC_VLD3dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 584*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 585*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 586*7330f729Sjoerg [2, 2, 3, 1]>, 587*7330f729Sjoerg // 588*7330f729Sjoerg // VLD3dupu 589*7330f729Sjoerg InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 590*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 591*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 592*7330f729Sjoerg [2, 2, 3, 2, 1, 1]>, 593*7330f729Sjoerg // 594*7330f729Sjoerg // VLD4 595*7330f729Sjoerg InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 596*7330f729Sjoerg InstrStage<4, [A8_NLSPipe], 0>, 597*7330f729Sjoerg InstrStage<4, [A8_LSPipe]>], 598*7330f729Sjoerg [3, 3, 4, 4, 1]>, 599*7330f729Sjoerg // 600*7330f729Sjoerg // VLD4ln 601*7330f729Sjoerg InstrItinData<IIC_VLD4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 602*7330f729Sjoerg InstrStage<5, [A8_NLSPipe], 0>, 603*7330f729Sjoerg InstrStage<5, [A8_LSPipe]>], 604*7330f729Sjoerg [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>, 605*7330f729Sjoerg // 606*7330f729Sjoerg // VLD4u 607*7330f729Sjoerg InstrItinData<IIC_VLD4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 608*7330f729Sjoerg InstrStage<4, [A8_NLSPipe], 0>, 609*7330f729Sjoerg InstrStage<4, [A8_LSPipe]>], 610*7330f729Sjoerg [3, 3, 4, 4, 2, 1]>, 611*7330f729Sjoerg // 612*7330f729Sjoerg // VLD4lnu 613*7330f729Sjoerg InstrItinData<IIC_VLD4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 614*7330f729Sjoerg InstrStage<5, [A8_NLSPipe], 0>, 615*7330f729Sjoerg InstrStage<5, [A8_LSPipe]>], 616*7330f729Sjoerg [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>, 617*7330f729Sjoerg // 618*7330f729Sjoerg // VLD4dup 619*7330f729Sjoerg InstrItinData<IIC_VLD4dup, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 620*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 621*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 622*7330f729Sjoerg [2, 2, 3, 3, 1]>, 623*7330f729Sjoerg // 624*7330f729Sjoerg // VLD4dupu 625*7330f729Sjoerg InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 626*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 627*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 628*7330f729Sjoerg [2, 2, 3, 3, 2, 1, 1]>, 629*7330f729Sjoerg // 630*7330f729Sjoerg // VST1 631*7330f729Sjoerg InstrItinData<IIC_VST1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 632*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 633*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 634*7330f729Sjoerg [1, 1, 1]>, 635*7330f729Sjoerg // 636*7330f729Sjoerg // VST1x2 637*7330f729Sjoerg InstrItinData<IIC_VST1x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 638*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 639*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 640*7330f729Sjoerg [1, 1, 1, 1]>, 641*7330f729Sjoerg // 642*7330f729Sjoerg // VST1x3 643*7330f729Sjoerg InstrItinData<IIC_VST1x3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 644*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 645*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 646*7330f729Sjoerg [1, 1, 1, 1, 2]>, 647*7330f729Sjoerg // 648*7330f729Sjoerg // VST1x4 649*7330f729Sjoerg InstrItinData<IIC_VST1x4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 650*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 651*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 652*7330f729Sjoerg [1, 1, 1, 1, 2, 2]>, 653*7330f729Sjoerg // 654*7330f729Sjoerg // VST1u 655*7330f729Sjoerg InstrItinData<IIC_VST1u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 656*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 657*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 658*7330f729Sjoerg [2, 1, 1, 1, 1]>, 659*7330f729Sjoerg // 660*7330f729Sjoerg // VST1x2u 661*7330f729Sjoerg InstrItinData<IIC_VST1x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 662*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 663*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 664*7330f729Sjoerg [2, 1, 1, 1, 1, 1]>, 665*7330f729Sjoerg // 666*7330f729Sjoerg // VST1x3u 667*7330f729Sjoerg InstrItinData<IIC_VST1x3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 668*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 669*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 670*7330f729Sjoerg [2, 1, 1, 1, 1, 1, 2]>, 671*7330f729Sjoerg // 672*7330f729Sjoerg // VST1x4u 673*7330f729Sjoerg InstrItinData<IIC_VST1x4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 674*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 675*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 676*7330f729Sjoerg [2, 1, 1, 1, 1, 1, 2, 2]>, 677*7330f729Sjoerg // 678*7330f729Sjoerg // VST1ln 679*7330f729Sjoerg InstrItinData<IIC_VST1ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 680*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 681*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 682*7330f729Sjoerg [1, 1, 1]>, 683*7330f729Sjoerg // 684*7330f729Sjoerg // VST1lnu 685*7330f729Sjoerg InstrItinData<IIC_VST1lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>, 686*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 687*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 688*7330f729Sjoerg [2, 1, 1, 1, 1]>, 689*7330f729Sjoerg // 690*7330f729Sjoerg // VST2 691*7330f729Sjoerg InstrItinData<IIC_VST2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 692*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 693*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 694*7330f729Sjoerg [1, 1, 1, 1]>, 695*7330f729Sjoerg // 696*7330f729Sjoerg // VST2x2 697*7330f729Sjoerg InstrItinData<IIC_VST2x2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 698*7330f729Sjoerg InstrStage<4, [A8_NLSPipe], 0>, 699*7330f729Sjoerg InstrStage<4, [A8_LSPipe]>], 700*7330f729Sjoerg [1, 1, 1, 1, 2, 2]>, 701*7330f729Sjoerg // 702*7330f729Sjoerg // VST2u 703*7330f729Sjoerg InstrItinData<IIC_VST2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 704*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 705*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 706*7330f729Sjoerg [2, 1, 1, 1, 1, 1]>, 707*7330f729Sjoerg // 708*7330f729Sjoerg // VST2x2u 709*7330f729Sjoerg InstrItinData<IIC_VST2x2u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 710*7330f729Sjoerg InstrStage<4, [A8_NLSPipe], 0>, 711*7330f729Sjoerg InstrStage<4, [A8_LSPipe]>], 712*7330f729Sjoerg [2, 1, 1, 1, 1, 1, 2, 2]>, 713*7330f729Sjoerg // 714*7330f729Sjoerg // VST2ln 715*7330f729Sjoerg InstrItinData<IIC_VST2ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 716*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 717*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 718*7330f729Sjoerg [1, 1, 1, 1]>, 719*7330f729Sjoerg // 720*7330f729Sjoerg // VST2lnu 721*7330f729Sjoerg InstrItinData<IIC_VST2lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 722*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 723*7330f729Sjoerg InstrStage<2, [A8_LSPipe]>], 724*7330f729Sjoerg [2, 1, 1, 1, 1, 1]>, 725*7330f729Sjoerg // 726*7330f729Sjoerg // VST3 727*7330f729Sjoerg InstrItinData<IIC_VST3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 728*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 729*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 730*7330f729Sjoerg [1, 1, 1, 1, 2]>, 731*7330f729Sjoerg // 732*7330f729Sjoerg // VST3u 733*7330f729Sjoerg InstrItinData<IIC_VST3u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 734*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 735*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 736*7330f729Sjoerg [2, 1, 1, 1, 1, 1, 2]>, 737*7330f729Sjoerg // 738*7330f729Sjoerg // VST3ln 739*7330f729Sjoerg InstrItinData<IIC_VST3ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 740*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 741*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 742*7330f729Sjoerg [1, 1, 1, 1, 2]>, 743*7330f729Sjoerg // 744*7330f729Sjoerg // VST3lnu 745*7330f729Sjoerg InstrItinData<IIC_VST3lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 746*7330f729Sjoerg InstrStage<3, [A8_NLSPipe], 0>, 747*7330f729Sjoerg InstrStage<3, [A8_LSPipe]>], 748*7330f729Sjoerg [2, 1, 1, 1, 1, 1, 2]>, 749*7330f729Sjoerg // 750*7330f729Sjoerg // VST4 751*7330f729Sjoerg InstrItinData<IIC_VST4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 752*7330f729Sjoerg InstrStage<4, [A8_NLSPipe], 0>, 753*7330f729Sjoerg InstrStage<4, [A8_LSPipe]>], 754*7330f729Sjoerg [1, 1, 1, 1, 2, 2]>, 755*7330f729Sjoerg // 756*7330f729Sjoerg // VST4u 757*7330f729Sjoerg InstrItinData<IIC_VST4u, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 758*7330f729Sjoerg InstrStage<4, [A8_NLSPipe], 0>, 759*7330f729Sjoerg InstrStage<4, [A8_LSPipe]>], 760*7330f729Sjoerg [2, 1, 1, 1, 1, 1, 2, 2]>, 761*7330f729Sjoerg // 762*7330f729Sjoerg // VST4ln 763*7330f729Sjoerg InstrItinData<IIC_VST4ln, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 764*7330f729Sjoerg InstrStage<4, [A8_NLSPipe], 0>, 765*7330f729Sjoerg InstrStage<4, [A8_LSPipe]>], 766*7330f729Sjoerg [1, 1, 1, 1, 2, 2]>, 767*7330f729Sjoerg // 768*7330f729Sjoerg // VST4lnu 769*7330f729Sjoerg InstrItinData<IIC_VST4lnu, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 770*7330f729Sjoerg InstrStage<4, [A8_NLSPipe], 0>, 771*7330f729Sjoerg InstrStage<4, [A8_LSPipe]>], 772*7330f729Sjoerg [2, 1, 1, 1, 1, 1, 2, 2]>, 773*7330f729Sjoerg // 774*7330f729Sjoerg // Double-register FP Unary 775*7330f729Sjoerg InstrItinData<IIC_VUNAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 776*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [5, 2]>, 777*7330f729Sjoerg // 778*7330f729Sjoerg // Quad-register FP Unary 779*7330f729Sjoerg // Result written in N5, but that is relative to the last cycle of multicycle, 780*7330f729Sjoerg // so we use 6 for those cases 781*7330f729Sjoerg InstrItinData<IIC_VUNAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 782*7330f729Sjoerg InstrStage<2, [A8_NPipe]>], [6, 2]>, 783*7330f729Sjoerg // 784*7330f729Sjoerg // Double-register FP Binary 785*7330f729Sjoerg InstrItinData<IIC_VBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 786*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [5, 2, 2]>, 787*7330f729Sjoerg // 788*7330f729Sjoerg // VPADD, etc. 789*7330f729Sjoerg InstrItinData<IIC_VPBIND, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 790*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [5, 2, 2]>, 791*7330f729Sjoerg // 792*7330f729Sjoerg // Double-register FP VMUL 793*7330f729Sjoerg InstrItinData<IIC_VFMULD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 794*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [5, 2, 1]>, 795*7330f729Sjoerg 796*7330f729Sjoerg // 797*7330f729Sjoerg // Quad-register FP Binary 798*7330f729Sjoerg // Result written in N5, but that is relative to the last cycle of multicycle, 799*7330f729Sjoerg // so we use 6 for those cases 800*7330f729Sjoerg InstrItinData<IIC_VBINQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 801*7330f729Sjoerg InstrStage<2, [A8_NPipe]>], [6, 2, 2]>, 802*7330f729Sjoerg // 803*7330f729Sjoerg // Quad-register FP VMUL 804*7330f729Sjoerg InstrItinData<IIC_VFMULQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 805*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [6, 2, 1]>, 806*7330f729Sjoerg // 807*7330f729Sjoerg // Move 808*7330f729Sjoerg InstrItinData<IIC_VMOV, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 809*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [1, 1]>, 810*7330f729Sjoerg // 811*7330f729Sjoerg // Move Immediate 812*7330f729Sjoerg InstrItinData<IIC_VMOVImm, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 813*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [3]>, 814*7330f729Sjoerg // 815*7330f729Sjoerg // Double-register Permute Move 816*7330f729Sjoerg InstrItinData<IIC_VMOVD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 817*7330f729Sjoerg InstrStage<1, [A8_NLSPipe]>], [2, 1]>, 818*7330f729Sjoerg // 819*7330f729Sjoerg // Quad-register Permute Move 820*7330f729Sjoerg // Result written in N2, but that is relative to the last cycle of multicycle, 821*7330f729Sjoerg // so we use 3 for those cases 822*7330f729Sjoerg InstrItinData<IIC_VMOVQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 823*7330f729Sjoerg InstrStage<2, [A8_NLSPipe]>], [3, 1]>, 824*7330f729Sjoerg // 825*7330f729Sjoerg // Integer to Single-precision Move 826*7330f729Sjoerg InstrItinData<IIC_VMOVIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 827*7330f729Sjoerg InstrStage<1, [A8_NLSPipe]>], [2, 1]>, 828*7330f729Sjoerg // 829*7330f729Sjoerg // Integer to Double-precision Move 830*7330f729Sjoerg InstrItinData<IIC_VMOVID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 831*7330f729Sjoerg InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, 832*7330f729Sjoerg // 833*7330f729Sjoerg // Single-precision to Integer Move 834*7330f729Sjoerg InstrItinData<IIC_VMOVSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 835*7330f729Sjoerg InstrStage<1, [A8_NLSPipe]>], [20, 1]>, 836*7330f729Sjoerg // 837*7330f729Sjoerg // Double-precision to Integer Move 838*7330f729Sjoerg InstrItinData<IIC_VMOVDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 839*7330f729Sjoerg InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>, 840*7330f729Sjoerg // 841*7330f729Sjoerg // Integer to Lane Move 842*7330f729Sjoerg InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 843*7330f729Sjoerg InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, 844*7330f729Sjoerg // 845*7330f729Sjoerg // Vector narrow move 846*7330f729Sjoerg InstrItinData<IIC_VMOVN , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 847*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [2, 1]>, 848*7330f729Sjoerg // 849*7330f729Sjoerg // Double-register Permute 850*7330f729Sjoerg InstrItinData<IIC_VPERMD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 851*7330f729Sjoerg InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>, 852*7330f729Sjoerg // 853*7330f729Sjoerg // Quad-register Permute 854*7330f729Sjoerg // Result written in N2, but that is relative to the last cycle of multicycle, 855*7330f729Sjoerg // so we use 3 for those cases 856*7330f729Sjoerg InstrItinData<IIC_VPERMQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 857*7330f729Sjoerg InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>, 858*7330f729Sjoerg // 859*7330f729Sjoerg // Quad-register Permute (3 cycle issue) 860*7330f729Sjoerg // Result written in N2, but that is relative to the last cycle of multicycle, 861*7330f729Sjoerg // so we use 4 for those cases 862*7330f729Sjoerg InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 863*7330f729Sjoerg InstrStage<1, [A8_NLSPipe]>, 864*7330f729Sjoerg InstrStage<1, [A8_NPipe], 0>, 865*7330f729Sjoerg InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>, 866*7330f729Sjoerg // 867*7330f729Sjoerg // Double-register FP Multiple-Accumulate 868*7330f729Sjoerg InstrItinData<IIC_VMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 869*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, 870*7330f729Sjoerg // 871*7330f729Sjoerg // Quad-register FP Multiple-Accumulate 872*7330f729Sjoerg // Result written in N9, but that is relative to the last cycle of multicycle, 873*7330f729Sjoerg // so we use 10 for those cases 874*7330f729Sjoerg InstrItinData<IIC_VMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 875*7330f729Sjoerg InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, 876*7330f729Sjoerg // 877*7330f729Sjoerg // Double-register Fused FP Multiple-Accumulate 878*7330f729Sjoerg InstrItinData<IIC_VFMACD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 879*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>, 880*7330f729Sjoerg // 881*7330f729Sjoerg // Quad-register Fused FP Multiple-Accumulate 882*7330f729Sjoerg // Result written in N9, but that is relative to the last cycle of multicycle, 883*7330f729Sjoerg // so we use 10 for those cases 884*7330f729Sjoerg InstrItinData<IIC_VFMACQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 885*7330f729Sjoerg InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>, 886*7330f729Sjoerg // 887*7330f729Sjoerg // Double-register Reciprical Step 888*7330f729Sjoerg InstrItinData<IIC_VRECSD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 889*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [9, 2, 2]>, 890*7330f729Sjoerg // 891*7330f729Sjoerg // Quad-register Reciprical Step 892*7330f729Sjoerg InstrItinData<IIC_VRECSQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 893*7330f729Sjoerg InstrStage<2, [A8_NPipe]>], [10, 2, 2]>, 894*7330f729Sjoerg // 895*7330f729Sjoerg // Double-register Integer Count 896*7330f729Sjoerg InstrItinData<IIC_VCNTiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 897*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, 898*7330f729Sjoerg // 899*7330f729Sjoerg // Quad-register Integer Count 900*7330f729Sjoerg // Result written in N3, but that is relative to the last cycle of multicycle, 901*7330f729Sjoerg // so we use 4 for those cases 902*7330f729Sjoerg InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 903*7330f729Sjoerg InstrStage<2, [A8_NPipe]>], [4, 2, 2]>, 904*7330f729Sjoerg // 905*7330f729Sjoerg // Double-register Integer Unary 906*7330f729Sjoerg InstrItinData<IIC_VUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 907*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [4, 2]>, 908*7330f729Sjoerg // 909*7330f729Sjoerg // Quad-register Integer Unary 910*7330f729Sjoerg InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 911*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [4, 2]>, 912*7330f729Sjoerg // 913*7330f729Sjoerg // Double-register Integer Q-Unary 914*7330f729Sjoerg InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 915*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [4, 1]>, 916*7330f729Sjoerg // 917*7330f729Sjoerg // Quad-register Integer CountQ-Unary 918*7330f729Sjoerg InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 919*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [4, 1]>, 920*7330f729Sjoerg // 921*7330f729Sjoerg // Double-register Integer Binary 922*7330f729Sjoerg InstrItinData<IIC_VBINiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 923*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, 924*7330f729Sjoerg // 925*7330f729Sjoerg // Quad-register Integer Binary 926*7330f729Sjoerg InstrItinData<IIC_VBINiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 927*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [3, 2, 2]>, 928*7330f729Sjoerg // 929*7330f729Sjoerg // Double-register Integer Binary (4 cycle) 930*7330f729Sjoerg InstrItinData<IIC_VBINi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 931*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, 932*7330f729Sjoerg // 933*7330f729Sjoerg // Quad-register Integer Binary (4 cycle) 934*7330f729Sjoerg InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 935*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, 936*7330f729Sjoerg 937*7330f729Sjoerg // 938*7330f729Sjoerg // Double-register Integer Subtract 939*7330f729Sjoerg InstrItinData<IIC_VSUBiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 940*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, 941*7330f729Sjoerg // 942*7330f729Sjoerg // Quad-register Integer Subtract 943*7330f729Sjoerg InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 944*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [3, 2, 1]>, 945*7330f729Sjoerg // 946*7330f729Sjoerg // Double-register Integer Subtract 947*7330f729Sjoerg InstrItinData<IIC_VSUBi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 948*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, 949*7330f729Sjoerg // 950*7330f729Sjoerg // Quad-register Integer Subtract 951*7330f729Sjoerg InstrItinData<IIC_VSUBi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 952*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [4, 2, 1]>, 953*7330f729Sjoerg // 954*7330f729Sjoerg // Double-register Integer Shift 955*7330f729Sjoerg InstrItinData<IIC_VSHLiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 956*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [3, 1, 1]>, 957*7330f729Sjoerg // 958*7330f729Sjoerg // Quad-register Integer Shift 959*7330f729Sjoerg InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 960*7330f729Sjoerg InstrStage<2, [A8_NPipe]>], [4, 1, 1]>, 961*7330f729Sjoerg // 962*7330f729Sjoerg // Double-register Integer Shift (4 cycle) 963*7330f729Sjoerg InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 964*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [4, 1, 1]>, 965*7330f729Sjoerg // 966*7330f729Sjoerg // Quad-register Integer Shift (4 cycle) 967*7330f729Sjoerg InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 968*7330f729Sjoerg InstrStage<2, [A8_NPipe]>], [5, 1, 1]>, 969*7330f729Sjoerg // 970*7330f729Sjoerg // Double-register Integer Pair Add Long 971*7330f729Sjoerg InstrItinData<IIC_VPALiD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 972*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [6, 3, 1]>, 973*7330f729Sjoerg // 974*7330f729Sjoerg // Quad-register Integer Pair Add Long 975*7330f729Sjoerg InstrItinData<IIC_VPALiQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 976*7330f729Sjoerg InstrStage<2, [A8_NPipe]>], [7, 3, 1]>, 977*7330f729Sjoerg // 978*7330f729Sjoerg // Double-register Absolute Difference and Accumulate 979*7330f729Sjoerg InstrItinData<IIC_VABAD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 980*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>, 981*7330f729Sjoerg // 982*7330f729Sjoerg // Quad-register Absolute Difference and Accumulate 983*7330f729Sjoerg InstrItinData<IIC_VABAQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 984*7330f729Sjoerg InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>, 985*7330f729Sjoerg 986*7330f729Sjoerg // 987*7330f729Sjoerg // Double-register Integer Multiply (.8, .16) 988*7330f729Sjoerg InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 989*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [6, 2, 2]>, 990*7330f729Sjoerg // 991*7330f729Sjoerg // Double-register Integer Multiply (.32) 992*7330f729Sjoerg InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 993*7330f729Sjoerg InstrStage<2, [A8_NPipe]>], [7, 2, 1]>, 994*7330f729Sjoerg // 995*7330f729Sjoerg // Quad-register Integer Multiply (.8, .16) 996*7330f729Sjoerg InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 997*7330f729Sjoerg InstrStage<2, [A8_NPipe]>], [7, 2, 2]>, 998*7330f729Sjoerg // 999*7330f729Sjoerg // Quad-register Integer Multiply (.32) 1000*7330f729Sjoerg InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1001*7330f729Sjoerg InstrStage<1, [A8_NPipe]>, 1002*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 1003*7330f729Sjoerg InstrStage<3, [A8_NPipe]>], [9, 2, 1]>, 1004*7330f729Sjoerg // 1005*7330f729Sjoerg // Double-register Integer Multiply-Accumulate (.8, .16) 1006*7330f729Sjoerg InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1007*7330f729Sjoerg InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>, 1008*7330f729Sjoerg // 1009*7330f729Sjoerg // Double-register Integer Multiply-Accumulate (.32) 1010*7330f729Sjoerg InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1011*7330f729Sjoerg InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>, 1012*7330f729Sjoerg // 1013*7330f729Sjoerg // Quad-register Integer Multiply-Accumulate (.8, .16) 1014*7330f729Sjoerg InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1015*7330f729Sjoerg InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>, 1016*7330f729Sjoerg // 1017*7330f729Sjoerg // Quad-register Integer Multiply-Accumulate (.32) 1018*7330f729Sjoerg InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1019*7330f729Sjoerg InstrStage<1, [A8_NPipe]>, 1020*7330f729Sjoerg InstrStage<2, [A8_NLSPipe], 0>, 1021*7330f729Sjoerg InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>, 1022*7330f729Sjoerg // 1023*7330f729Sjoerg // Double-register VEXT 1024*7330f729Sjoerg InstrItinData<IIC_VEXTD, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1025*7330f729Sjoerg InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>, 1026*7330f729Sjoerg // 1027*7330f729Sjoerg // Quad-register VEXT 1028*7330f729Sjoerg InstrItinData<IIC_VEXTQ, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1029*7330f729Sjoerg InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>, 1030*7330f729Sjoerg // 1031*7330f729Sjoerg // VTB 1032*7330f729Sjoerg InstrItinData<IIC_VTB1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1033*7330f729Sjoerg InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>, 1034*7330f729Sjoerg InstrItinData<IIC_VTB2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1035*7330f729Sjoerg InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>, 1036*7330f729Sjoerg InstrItinData<IIC_VTB3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1037*7330f729Sjoerg InstrStage<1, [A8_NLSPipe]>, 1038*7330f729Sjoerg InstrStage<1, [A8_NPipe], 0>, 1039*7330f729Sjoerg InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>, 1040*7330f729Sjoerg InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1041*7330f729Sjoerg InstrStage<1, [A8_NLSPipe]>, 1042*7330f729Sjoerg InstrStage<1, [A8_NPipe], 0>, 1043*7330f729Sjoerg InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>, 1044*7330f729Sjoerg // 1045*7330f729Sjoerg // VTBX 1046*7330f729Sjoerg InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1047*7330f729Sjoerg InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>, 1048*7330f729Sjoerg InstrItinData<IIC_VTBX2, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1049*7330f729Sjoerg InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>, 1050*7330f729Sjoerg InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1051*7330f729Sjoerg InstrStage<1, [A8_NLSPipe]>, 1052*7330f729Sjoerg InstrStage<1, [A8_NPipe], 0>, 1053*7330f729Sjoerg InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>, 1054*7330f729Sjoerg InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>, 1055*7330f729Sjoerg InstrStage<1, [A8_NLSPipe]>, 1056*7330f729Sjoerg InstrStage<1, [A8_NPipe], 0>, 1057*7330f729Sjoerg InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> 1058*7330f729Sjoerg]>; 1059*7330f729Sjoerg 1060*7330f729Sjoerg// ===---------------------------------------------------------------------===// 1061*7330f729Sjoerg// This following definitions describe the simple machine model which 1062*7330f729Sjoerg// will replace itineraries. 1063*7330f729Sjoerg 1064*7330f729Sjoerg// Cortex-A8 machine model for scheduling and other instruction cost heuristics. 1065*7330f729Sjoergdef CortexA8Model : SchedMachineModel { 1066*7330f729Sjoerg let IssueWidth = 2; // 2 micro-ops are dispatched per cycle. 1067*7330f729Sjoerg let LoadLatency = 2; // Optimistic load latency assuming bypass. 1068*7330f729Sjoerg // This is overriden by OperandCycles if the 1069*7330f729Sjoerg // Itineraries are queried instead. 1070*7330f729Sjoerg let MispredictPenalty = 13; // Based on estimate of pipeline depth. 1071*7330f729Sjoerg let CompleteModel = 0; 1072*7330f729Sjoerg 1073*7330f729Sjoerg let Itineraries = CortexA8Itineraries; 1074*7330f729Sjoerg} 1075