xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Target/ARM/ARMScheduleA8.td (revision 7330f729ccf0bd976a06f95fad452fe774fc7fd1)
1*7330f729Sjoerg//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
2*7330f729Sjoerg//
3*7330f729Sjoerg// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*7330f729Sjoerg// See https://llvm.org/LICENSE.txt for license information.
5*7330f729Sjoerg// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*7330f729Sjoerg//
7*7330f729Sjoerg//===----------------------------------------------------------------------===//
8*7330f729Sjoerg//
9*7330f729Sjoerg// This file defines the itinerary class data for the ARM Cortex A8 processors.
10*7330f729Sjoerg//
11*7330f729Sjoerg//===----------------------------------------------------------------------===//
12*7330f729Sjoerg
13*7330f729Sjoerg//
14*7330f729Sjoerg// Scheduling information derived from "Cortex-A8 Technical Reference Manual".
15*7330f729Sjoerg// Functional Units.
16*7330f729Sjoergdef A8_Pipe0   : FuncUnit; // pipeline 0
17*7330f729Sjoergdef A8_Pipe1   : FuncUnit; // pipeline 1
18*7330f729Sjoergdef A8_LSPipe  : FuncUnit; // Load / store pipeline
19*7330f729Sjoergdef A8_NPipe   : FuncUnit; // NEON ALU/MUL pipe
20*7330f729Sjoergdef A8_NLSPipe : FuncUnit; // NEON LS pipe
21*7330f729Sjoerg//
22*7330f729Sjoerg// Dual issue pipeline represented by A8_Pipe0 | A8_Pipe1
23*7330f729Sjoerg//
24*7330f729Sjoergdef CortexA8Itineraries : ProcessorItineraries<
25*7330f729Sjoerg  [A8_Pipe0, A8_Pipe1, A8_LSPipe, A8_NPipe, A8_NLSPipe],
26*7330f729Sjoerg  [], [
27*7330f729Sjoerg  // Two fully-pipelined integer ALU pipelines
28*7330f729Sjoerg  //
29*7330f729Sjoerg  // No operand cycles
30*7330f729Sjoerg  InstrItinData<IIC_iALUx    , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
31*7330f729Sjoerg  //
32*7330f729Sjoerg  // Binary Instructions that produce a result
33*7330f729Sjoerg  InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
34*7330f729Sjoerg  InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
35*7330f729Sjoerg  InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
36*7330f729Sjoerg  InstrItinData<IIC_iALUsir,[InstrStage<1,[A8_Pipe0, A8_Pipe1]>], [2, 1, 2]>,
37*7330f729Sjoerg  InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
38*7330f729Sjoerg  //
39*7330f729Sjoerg  // Bitwise Instructions that produce a result
40*7330f729Sjoerg  InstrItinData<IIC_iBITi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
41*7330f729Sjoerg  InstrItinData<IIC_iBITr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
42*7330f729Sjoerg  InstrItinData<IIC_iBITsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
43*7330f729Sjoerg  InstrItinData<IIC_iBITsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
44*7330f729Sjoerg  //
45*7330f729Sjoerg  // Unary Instructions that produce a result
46*7330f729Sjoerg  InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
47*7330f729Sjoerg  InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
48*7330f729Sjoerg  //
49*7330f729Sjoerg  // Zero and sign extension instructions
50*7330f729Sjoerg  InstrItinData<IIC_iEXTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
51*7330f729Sjoerg  InstrItinData<IIC_iEXTAr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
52*7330f729Sjoerg  InstrItinData<IIC_iEXTAsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>],[2, 2, 1, 1]>,
53*7330f729Sjoerg  //
54*7330f729Sjoerg  // Compare instructions
55*7330f729Sjoerg  InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
56*7330f729Sjoerg  InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
57*7330f729Sjoerg  InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
58*7330f729Sjoerg  InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
59*7330f729Sjoerg  //
60*7330f729Sjoerg  // Test instructions
61*7330f729Sjoerg  InstrItinData<IIC_iTSTi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
62*7330f729Sjoerg  InstrItinData<IIC_iTSTr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
63*7330f729Sjoerg  InstrItinData<IIC_iTSTsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
64*7330f729Sjoerg  InstrItinData<IIC_iTSTsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
65*7330f729Sjoerg  //
66*7330f729Sjoerg  // Move instructions, unconditional
67*7330f729Sjoerg  InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
68*7330f729Sjoerg  InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
69*7330f729Sjoerg  InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
70*7330f729Sjoerg  InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
71*7330f729Sjoerg  InstrItinData<IIC_iMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
72*7330f729Sjoerg                             InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
73*7330f729Sjoerg  InstrItinData<IIC_iMOVix2addpc,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
74*7330f729Sjoerg                                  InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
75*7330f729Sjoerg                                  InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3]>,
76*7330f729Sjoerg  InstrItinData<IIC_iMOVix2ld,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
77*7330f729Sjoerg                               InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
78*7330f729Sjoerg                               InstrStage<1, [A8_LSPipe]>], [5]>,
79*7330f729Sjoerg  //
80*7330f729Sjoerg  // Move instructions, conditional
81*7330f729Sjoerg  InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
82*7330f729Sjoerg  InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
83*7330f729Sjoerg  InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
84*7330f729Sjoerg  InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
85*7330f729Sjoerg  InstrItinData<IIC_iCMOVix2,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
86*7330f729Sjoerg                              InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [3, 1]>,
87*7330f729Sjoerg  //
88*7330f729Sjoerg  // MVN instructions
89*7330f729Sjoerg  InstrItinData<IIC_iMVNi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
90*7330f729Sjoerg  InstrItinData<IIC_iMVNr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
91*7330f729Sjoerg  InstrItinData<IIC_iMVNsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
92*7330f729Sjoerg  InstrItinData<IIC_iMVNsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
93*7330f729Sjoerg
94*7330f729Sjoerg  // Integer multiply pipeline
95*7330f729Sjoerg  // Result written in E5, but that is relative to the last cycle of multicycle,
96*7330f729Sjoerg  // so we use 6 for those cases
97*7330f729Sjoerg  //
98*7330f729Sjoerg  InstrItinData<IIC_iMUL16   , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
99*7330f729Sjoerg  InstrItinData<IIC_iMAC16   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
100*7330f729Sjoerg  InstrItinData<IIC_iMUL32   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
101*7330f729Sjoerg  InstrItinData<IIC_iMAC32   , [InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
102*7330f729Sjoerg  InstrItinData<IIC_iMUL64   , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
103*7330f729Sjoerg  InstrItinData<IIC_iMAC64   , [InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
104*7330f729Sjoerg
105*7330f729Sjoerg  // Integer load pipeline
106*7330f729Sjoerg  //
107*7330f729Sjoerg  // Immediate offset
108*7330f729Sjoerg  InstrItinData<IIC_iLoad_i   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
109*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
110*7330f729Sjoerg  InstrItinData<IIC_iLoad_bh_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
111*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
112*7330f729Sjoerg  InstrItinData<IIC_iLoad_d_i,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
113*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
114*7330f729Sjoerg  //
115*7330f729Sjoerg  // Register offset
116*7330f729Sjoerg  InstrItinData<IIC_iLoad_r   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
117*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
118*7330f729Sjoerg  InstrItinData<IIC_iLoad_bh_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
119*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
120*7330f729Sjoerg  InstrItinData<IIC_iLoad_d_r , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
121*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
122*7330f729Sjoerg  //
123*7330f729Sjoerg  // Scaled register offset, issues over 2 cycles
124*7330f729Sjoerg  // FIXME: lsl by 2 takes 1 cycle.
125*7330f729Sjoerg  InstrItinData<IIC_iLoad_si  , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
126*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
127*7330f729Sjoerg  InstrItinData<IIC_iLoad_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
128*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [4, 1, 1]>,
129*7330f729Sjoerg  //
130*7330f729Sjoerg  // Immediate offset with update
131*7330f729Sjoerg  InstrItinData<IIC_iLoad_iu  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
132*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
133*7330f729Sjoerg  InstrItinData<IIC_iLoad_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
134*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1]>,
135*7330f729Sjoerg  //
136*7330f729Sjoerg  // Register offset with update
137*7330f729Sjoerg  InstrItinData<IIC_iLoad_ru  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
138*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
139*7330f729Sjoerg  InstrItinData<IIC_iLoad_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
140*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
141*7330f729Sjoerg  InstrItinData<IIC_iLoad_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
142*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 2, 1, 1]>,
143*7330f729Sjoerg  //
144*7330f729Sjoerg  // Scaled register offset with update, issues over 2 cycles
145*7330f729Sjoerg  InstrItinData<IIC_iLoad_siu , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
146*7330f729Sjoerg                                 InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
147*7330f729Sjoerg  InstrItinData<IIC_iLoad_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
148*7330f729Sjoerg                                  InstrStage<2, [A8_LSPipe]>], [4, 3, 1, 1]>,
149*7330f729Sjoerg  //
150*7330f729Sjoerg  // Load multiple, def is the 5th operand. Pipeline 0 only.
151*7330f729Sjoerg  // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
152*7330f729Sjoerg  InstrItinData<IIC_iLoad_m  , [InstrStage<2, [A8_Pipe0], 0>,
153*7330f729Sjoerg                                InstrStage<2, [A8_LSPipe]>],
154*7330f729Sjoerg                [1, 1, 1, 1, 3], [], -1>, // dynamic uops
155*7330f729Sjoerg  //
156*7330f729Sjoerg  // Load multiple + update, defs are the 1st and 5th operands.
157*7330f729Sjoerg  InstrItinData<IIC_iLoad_mu , [InstrStage<3, [A8_Pipe0], 0>,
158*7330f729Sjoerg                                InstrStage<3, [A8_LSPipe]>],
159*7330f729Sjoerg                [2, 1, 1, 1, 3], [], -1>, // dynamic uops
160*7330f729Sjoerg  //
161*7330f729Sjoerg  // Load multiple plus branch
162*7330f729Sjoerg  InstrItinData<IIC_iLoad_mBr, [InstrStage<3, [A8_Pipe0], 0>,
163*7330f729Sjoerg                                InstrStage<3, [A8_LSPipe]>,
164*7330f729Sjoerg                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
165*7330f729Sjoerg                              [1, 2, 1, 1, 3], [], -1>, // dynamic uops
166*7330f729Sjoerg  //
167*7330f729Sjoerg  // Pop, def is the 3rd operand.
168*7330f729Sjoerg  InstrItinData<IIC_iPop  ,    [InstrStage<3, [A8_Pipe0], 0>,
169*7330f729Sjoerg                                InstrStage<3, [A8_LSPipe]>],
170*7330f729Sjoerg                [1, 1, 3], [], -1>, // dynamic uops
171*7330f729Sjoerg  //
172*7330f729Sjoerg  // Push, def is the 3th operand.
173*7330f729Sjoerg  InstrItinData<IIC_iPop_Br,   [InstrStage<3, [A8_Pipe0], 0>,
174*7330f729Sjoerg                                InstrStage<3, [A8_LSPipe]>,
175*7330f729Sjoerg                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>],
176*7330f729Sjoerg                               [1, 1, 3], [], -1>, // dynamic uops
177*7330f729Sjoerg  //
178*7330f729Sjoerg  // iLoadi + iALUr for t2LDRpci_pic.
179*7330f729Sjoerg  InstrItinData<IIC_iLoadiALU, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
180*7330f729Sjoerg                                InstrStage<1, [A8_LSPipe]>,
181*7330f729Sjoerg                                InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [4, 1]>,
182*7330f729Sjoerg
183*7330f729Sjoerg
184*7330f729Sjoerg  // Integer store pipeline
185*7330f729Sjoerg  //
186*7330f729Sjoerg  // Immediate offset
187*7330f729Sjoerg  InstrItinData<IIC_iStore_i  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
188*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
189*7330f729Sjoerg  InstrItinData<IIC_iStore_bh_i,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
190*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
191*7330f729Sjoerg  InstrItinData<IIC_iStore_d_i, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
192*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 1]>,
193*7330f729Sjoerg  //
194*7330f729Sjoerg  // Register offset
195*7330f729Sjoerg  InstrItinData<IIC_iStore_r  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
196*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
197*7330f729Sjoerg  InstrItinData<IIC_iStore_bh_r,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
198*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
199*7330f729Sjoerg  InstrItinData<IIC_iStore_d_r, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
200*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [3, 1, 1]>,
201*7330f729Sjoerg  //
202*7330f729Sjoerg  // Scaled register offset, issues over 2 cycles
203*7330f729Sjoerg  InstrItinData<IIC_iStore_si , [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
204*7330f729Sjoerg                                 InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
205*7330f729Sjoerg  InstrItinData<IIC_iStore_bh_si,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
206*7330f729Sjoerg                                  InstrStage<2, [A8_LSPipe]>], [3, 1, 1]>,
207*7330f729Sjoerg  //
208*7330f729Sjoerg  // Immediate offset with update
209*7330f729Sjoerg  InstrItinData<IIC_iStore_iu , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
210*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
211*7330f729Sjoerg  InstrItinData<IIC_iStore_bh_iu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
212*7330f729Sjoerg                                 InstrStage<1, [A8_LSPipe]>], [2, 3, 1]>,
213*7330f729Sjoerg  //
214*7330f729Sjoerg  // Register offset with update
215*7330f729Sjoerg  InstrItinData<IIC_iStore_ru  , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
216*7330f729Sjoerg                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
217*7330f729Sjoerg  InstrItinData<IIC_iStore_bh_ru,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
218*7330f729Sjoerg                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
219*7330f729Sjoerg  InstrItinData<IIC_iStore_d_ru, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
220*7330f729Sjoerg                                  InstrStage<1, [A8_LSPipe]>], [2, 3, 1, 1]>,
221*7330f729Sjoerg  //
222*7330f729Sjoerg  // Scaled register offset with update, issues over 2 cycles
223*7330f729Sjoerg  InstrItinData<IIC_iStore_siu, [InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
224*7330f729Sjoerg                                 InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
225*7330f729Sjoerg  InstrItinData<IIC_iStore_bh_siu,[InstrStage<2, [A8_Pipe0, A8_Pipe1], 0>,
226*7330f729Sjoerg                                   InstrStage<2, [A8_LSPipe]>], [3, 3, 1, 1]>,
227*7330f729Sjoerg  //
228*7330f729Sjoerg  // Store multiple. Pipeline 0 only.
229*7330f729Sjoerg  // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
230*7330f729Sjoerg  InstrItinData<IIC_iStore_m , [InstrStage<2, [A8_Pipe0], 0>,
231*7330f729Sjoerg                                InstrStage<2, [A8_LSPipe]>],
232*7330f729Sjoerg                [], [], -1>, // dynamic uops
233*7330f729Sjoerg  //
234*7330f729Sjoerg  // Store multiple + update
235*7330f729Sjoerg  InstrItinData<IIC_iStore_mu, [InstrStage<2, [A8_Pipe0], 0>,
236*7330f729Sjoerg                                InstrStage<2, [A8_LSPipe]>],
237*7330f729Sjoerg                [2], [], -1>, // dynamic uops
238*7330f729Sjoerg  //
239*7330f729Sjoerg  // Preload
240*7330f729Sjoerg  InstrItinData<IIC_Preload, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
241*7330f729Sjoerg
242*7330f729Sjoerg  // Branch
243*7330f729Sjoerg  //
244*7330f729Sjoerg  // no delay slots, so the latency of a branch is unimportant
245*7330f729Sjoerg  InstrItinData<IIC_Br      , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
246*7330f729Sjoerg
247*7330f729Sjoerg  // VFP
248*7330f729Sjoerg  // Issue through integer pipeline, and execute in NEON unit. We assume
249*7330f729Sjoerg  // RunFast mode so that NFP pipeline is used for single-precision when
250*7330f729Sjoerg  // possible.
251*7330f729Sjoerg  //
252*7330f729Sjoerg  // FP Special Register to Integer Register File Move
253*7330f729Sjoerg  InstrItinData<IIC_fpSTAT , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
254*7330f729Sjoerg                              InstrStage<1, [A8_NLSPipe]>], [20]>,
255*7330f729Sjoerg  //
256*7330f729Sjoerg  // Single-precision FP Unary
257*7330f729Sjoerg  InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
258*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
259*7330f729Sjoerg  //
260*7330f729Sjoerg  // Double-precision FP Unary
261*7330f729Sjoerg  InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
262*7330f729Sjoerg                               InstrStage<4, [A8_NPipe], 0>,
263*7330f729Sjoerg                               InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
264*7330f729Sjoerg  //
265*7330f729Sjoerg  // Single-precision FP Compare
266*7330f729Sjoerg  InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
267*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [1, 1]>,
268*7330f729Sjoerg  //
269*7330f729Sjoerg  // Double-precision FP Compare
270*7330f729Sjoerg  InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
271*7330f729Sjoerg                               InstrStage<4, [A8_NPipe], 0>,
272*7330f729Sjoerg                               InstrStage<4, [A8_NLSPipe]>], [4, 1]>,
273*7330f729Sjoerg  //
274*7330f729Sjoerg  // Single to Double FP Convert
275*7330f729Sjoerg  InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
276*7330f729Sjoerg                               InstrStage<7, [A8_NPipe], 0>,
277*7330f729Sjoerg                               InstrStage<7, [A8_NLSPipe]>], [7, 1]>,
278*7330f729Sjoerg  //
279*7330f729Sjoerg  // Double to Single FP Convert
280*7330f729Sjoerg  InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
281*7330f729Sjoerg                               InstrStage<5, [A8_NPipe], 0>,
282*7330f729Sjoerg                               InstrStage<5, [A8_NLSPipe]>], [5, 1]>,
283*7330f729Sjoerg  //
284*7330f729Sjoerg  // Single-Precision FP to Integer Convert
285*7330f729Sjoerg  InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
286*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
287*7330f729Sjoerg  //
288*7330f729Sjoerg  // Double-Precision FP to Integer Convert
289*7330f729Sjoerg  InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
290*7330f729Sjoerg                               InstrStage<8, [A8_NPipe], 0>,
291*7330f729Sjoerg                               InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
292*7330f729Sjoerg  //
293*7330f729Sjoerg  // Integer to Single-Precision FP Convert
294*7330f729Sjoerg  InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
295*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [7, 1]>,
296*7330f729Sjoerg  //
297*7330f729Sjoerg  // Integer to Double-Precision FP Convert
298*7330f729Sjoerg  InstrItinData<IIC_fpCVTID , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
299*7330f729Sjoerg                               InstrStage<8, [A8_NPipe], 0>,
300*7330f729Sjoerg                               InstrStage<8, [A8_NLSPipe]>], [8, 1]>,
301*7330f729Sjoerg  //
302*7330f729Sjoerg  // Single-precision FP ALU
303*7330f729Sjoerg  InstrItinData<IIC_fpALU32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
304*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
305*7330f729Sjoerg  //
306*7330f729Sjoerg  // Double-precision FP ALU
307*7330f729Sjoerg  InstrItinData<IIC_fpALU64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
308*7330f729Sjoerg                               InstrStage<9, [A8_NPipe], 0>,
309*7330f729Sjoerg                               InstrStage<9, [A8_NLSPipe]>], [9, 1, 1]>,
310*7330f729Sjoerg  //
311*7330f729Sjoerg  // Single-precision FP Multiply
312*7330f729Sjoerg  InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
313*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [7, 1, 1]>,
314*7330f729Sjoerg  //
315*7330f729Sjoerg  // Double-precision FP Multiply
316*7330f729Sjoerg  InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
317*7330f729Sjoerg                               InstrStage<11, [A8_NPipe], 0>,
318*7330f729Sjoerg                               InstrStage<11, [A8_NLSPipe]>], [11, 1, 1]>,
319*7330f729Sjoerg  //
320*7330f729Sjoerg  // Single-precision FP MAC
321*7330f729Sjoerg  InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
322*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
323*7330f729Sjoerg  //
324*7330f729Sjoerg  // Double-precision FP MAC
325*7330f729Sjoerg  InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
326*7330f729Sjoerg                               InstrStage<19, [A8_NPipe], 0>,
327*7330f729Sjoerg                               InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
328*7330f729Sjoerg  //
329*7330f729Sjoerg  // Single-precision Fused FP MAC
330*7330f729Sjoerg  InstrItinData<IIC_fpFMAC32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
331*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [7, 2, 1, 1]>,
332*7330f729Sjoerg  //
333*7330f729Sjoerg  // Double-precision Fused FP MAC
334*7330f729Sjoerg  InstrItinData<IIC_fpFMAC64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
335*7330f729Sjoerg                               InstrStage<19, [A8_NPipe], 0>,
336*7330f729Sjoerg                               InstrStage<19, [A8_NLSPipe]>], [19, 2, 1, 1]>,
337*7330f729Sjoerg  //
338*7330f729Sjoerg  // Single-precision FP DIV
339*7330f729Sjoerg  InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
340*7330f729Sjoerg                               InstrStage<20, [A8_NPipe], 0>,
341*7330f729Sjoerg                               InstrStage<20, [A8_NLSPipe]>], [20, 1, 1]>,
342*7330f729Sjoerg  //
343*7330f729Sjoerg  // Double-precision FP DIV
344*7330f729Sjoerg  InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
345*7330f729Sjoerg                               InstrStage<29, [A8_NPipe], 0>,
346*7330f729Sjoerg                               InstrStage<29, [A8_NLSPipe]>], [29, 1, 1]>,
347*7330f729Sjoerg  //
348*7330f729Sjoerg  // Single-precision FP SQRT
349*7330f729Sjoerg  InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
350*7330f729Sjoerg                               InstrStage<19, [A8_NPipe], 0>,
351*7330f729Sjoerg                               InstrStage<19, [A8_NLSPipe]>], [19, 1]>,
352*7330f729Sjoerg  //
353*7330f729Sjoerg  // Double-precision FP SQRT
354*7330f729Sjoerg  InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
355*7330f729Sjoerg                               InstrStage<29, [A8_NPipe], 0>,
356*7330f729Sjoerg                               InstrStage<29, [A8_NLSPipe]>], [29, 1]>,
357*7330f729Sjoerg
358*7330f729Sjoerg  //
359*7330f729Sjoerg  // Integer to Single-precision Move
360*7330f729Sjoerg  InstrItinData<IIC_fpMOVIS,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
361*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>],
362*7330f729Sjoerg                              [2, 1]>,
363*7330f729Sjoerg  //
364*7330f729Sjoerg  // Integer to Double-precision Move
365*7330f729Sjoerg  InstrItinData<IIC_fpMOVID,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
366*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>],
367*7330f729Sjoerg                              [2, 1, 1]>,
368*7330f729Sjoerg  //
369*7330f729Sjoerg  // Single-precision to Integer Move
370*7330f729Sjoerg  InstrItinData<IIC_fpMOVSI,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
371*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>],
372*7330f729Sjoerg                              [20, 1]>,
373*7330f729Sjoerg  //
374*7330f729Sjoerg  // Double-precision to Integer Move
375*7330f729Sjoerg  InstrItinData<IIC_fpMOVDI,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
376*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>],
377*7330f729Sjoerg                              [20, 20, 1]>,
378*7330f729Sjoerg
379*7330f729Sjoerg  //
380*7330f729Sjoerg  // Single-precision FP Load
381*7330f729Sjoerg  InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
382*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe], 0>,
383*7330f729Sjoerg                               InstrStage<1, [A8_LSPipe]>],
384*7330f729Sjoerg                              [2, 1]>,
385*7330f729Sjoerg  //
386*7330f729Sjoerg  // Double-precision FP Load
387*7330f729Sjoerg  InstrItinData<IIC_fpLoad64, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
388*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe], 0>,
389*7330f729Sjoerg                               InstrStage<1, [A8_LSPipe]>],
390*7330f729Sjoerg                              [2, 1]>,
391*7330f729Sjoerg  //
392*7330f729Sjoerg  // FP Load Multiple
393*7330f729Sjoerg  // FIXME: A8_LSPipe cycle time is dynamic, this assumes 3 to 4 registers.
394*7330f729Sjoerg  InstrItinData<IIC_fpLoad_m, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
395*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe], 0>,
396*7330f729Sjoerg                               InstrStage<1, [A8_LSPipe]>,
397*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe], 0>,
398*7330f729Sjoerg                               InstrStage<1, [A8_LSPipe]>],
399*7330f729Sjoerg                [1, 1, 1, 2], [], -1>, // dynamic uops
400*7330f729Sjoerg  //
401*7330f729Sjoerg  // FP Load Multiple + update
402*7330f729Sjoerg  InstrItinData<IIC_fpLoad_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
403*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe], 0>,
404*7330f729Sjoerg                               InstrStage<1, [A8_LSPipe]>,
405*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe], 0>,
406*7330f729Sjoerg                               InstrStage<1, [A8_LSPipe]>],
407*7330f729Sjoerg                [2, 1, 1, 1, 2], [], -1>, // dynamic uops
408*7330f729Sjoerg  //
409*7330f729Sjoerg  // Single-precision FP Store
410*7330f729Sjoerg  InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
411*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe], 0>,
412*7330f729Sjoerg                               InstrStage<1, [A8_LSPipe]>],
413*7330f729Sjoerg                              [1, 1]>,
414*7330f729Sjoerg  //
415*7330f729Sjoerg  // Double-precision FP Store
416*7330f729Sjoerg  InstrItinData<IIC_fpStore64,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
417*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe], 0>,
418*7330f729Sjoerg                               InstrStage<1, [A8_LSPipe]>],
419*7330f729Sjoerg                              [1, 1]>,
420*7330f729Sjoerg  //
421*7330f729Sjoerg  // FP Store Multiple
422*7330f729Sjoerg  InstrItinData<IIC_fpStore_m,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
423*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe], 0>,
424*7330f729Sjoerg                               InstrStage<1, [A8_LSPipe]>,
425*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe], 0>,
426*7330f729Sjoerg                               InstrStage<1, [A8_LSPipe]>],
427*7330f729Sjoerg                [1, 1, 1, 1], [], -1>, // dynamic uops
428*7330f729Sjoerg  //
429*7330f729Sjoerg  // FP Store Multiple + update
430*7330f729Sjoerg  InstrItinData<IIC_fpStore_mu,[InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
431*7330f729Sjoerg                                InstrStage<1, [A8_NLSPipe], 0>,
432*7330f729Sjoerg                                InstrStage<1, [A8_LSPipe]>,
433*7330f729Sjoerg                                InstrStage<1, [A8_NLSPipe], 0>,
434*7330f729Sjoerg                                InstrStage<1, [A8_LSPipe]>],
435*7330f729Sjoerg                [2, 1, 1, 1, 1], [], -1>, // dynamic uops
436*7330f729Sjoerg  // NEON
437*7330f729Sjoerg  // Issue through integer pipeline, and execute in NEON unit.
438*7330f729Sjoerg  //
439*7330f729Sjoerg  // VLD1
440*7330f729Sjoerg  InstrItinData<IIC_VLD1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
441*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
442*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
443*7330f729Sjoerg                              [2, 1]>,
444*7330f729Sjoerg  // VLD1x2
445*7330f729Sjoerg  InstrItinData<IIC_VLD1x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
446*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
447*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
448*7330f729Sjoerg                              [2, 2, 1]>,
449*7330f729Sjoerg  //
450*7330f729Sjoerg  // VLD1x3
451*7330f729Sjoerg  InstrItinData<IIC_VLD1x3,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
452*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
453*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
454*7330f729Sjoerg                              [2, 2, 3, 1]>,
455*7330f729Sjoerg  //
456*7330f729Sjoerg  // VLD1x4
457*7330f729Sjoerg  InstrItinData<IIC_VLD1x4,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
458*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
459*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
460*7330f729Sjoerg                              [2, 2, 3, 3, 1]>,
461*7330f729Sjoerg  //
462*7330f729Sjoerg  // VLD1u
463*7330f729Sjoerg  InstrItinData<IIC_VLD1u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
464*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
465*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
466*7330f729Sjoerg                              [2, 2, 1]>,
467*7330f729Sjoerg  //
468*7330f729Sjoerg  // VLD1x2u
469*7330f729Sjoerg  InstrItinData<IIC_VLD1x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
470*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
471*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
472*7330f729Sjoerg                              [2, 2, 2, 1]>,
473*7330f729Sjoerg  //
474*7330f729Sjoerg  // VLD1x3u
475*7330f729Sjoerg  InstrItinData<IIC_VLD1x3u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
476*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
477*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
478*7330f729Sjoerg                              [2, 2, 3, 2, 1]>,
479*7330f729Sjoerg  //
480*7330f729Sjoerg  // VLD1x4u
481*7330f729Sjoerg  InstrItinData<IIC_VLD1x4u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
482*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
483*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
484*7330f729Sjoerg                              [2, 2, 3, 3, 2, 1]>,
485*7330f729Sjoerg  //
486*7330f729Sjoerg  // VLD1ln
487*7330f729Sjoerg  InstrItinData<IIC_VLD1ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
488*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
489*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
490*7330f729Sjoerg                              [3, 1, 1, 1]>,
491*7330f729Sjoerg  //
492*7330f729Sjoerg  // VLD1lnu
493*7330f729Sjoerg  InstrItinData<IIC_VLD1lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
494*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
495*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
496*7330f729Sjoerg                              [3, 2, 1, 1, 1, 1]>,
497*7330f729Sjoerg  //
498*7330f729Sjoerg  // VLD1dup
499*7330f729Sjoerg  InstrItinData<IIC_VLD1dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
500*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
501*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
502*7330f729Sjoerg                              [2, 1]>,
503*7330f729Sjoerg  //
504*7330f729Sjoerg  // VLD1dupu
505*7330f729Sjoerg  InstrItinData<IIC_VLD1dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
506*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
507*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
508*7330f729Sjoerg                              [2, 2, 1, 1]>,
509*7330f729Sjoerg  //
510*7330f729Sjoerg  // VLD2
511*7330f729Sjoerg  InstrItinData<IIC_VLD2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
512*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
513*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
514*7330f729Sjoerg                              [2, 2, 1]>,
515*7330f729Sjoerg  //
516*7330f729Sjoerg  // VLD2x2
517*7330f729Sjoerg  InstrItinData<IIC_VLD2x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
518*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
519*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
520*7330f729Sjoerg                              [2, 2, 3, 3, 1]>,
521*7330f729Sjoerg  //
522*7330f729Sjoerg  // VLD2ln
523*7330f729Sjoerg  InstrItinData<IIC_VLD2ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
524*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
525*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
526*7330f729Sjoerg                              [3, 3, 1, 1, 1, 1]>,
527*7330f729Sjoerg  //
528*7330f729Sjoerg  // VLD2u
529*7330f729Sjoerg  InstrItinData<IIC_VLD2u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
530*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
531*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
532*7330f729Sjoerg                              [2, 2, 2, 1, 1, 1]>,
533*7330f729Sjoerg  //
534*7330f729Sjoerg  // VLD2x2u
535*7330f729Sjoerg  InstrItinData<IIC_VLD2x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
536*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
537*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
538*7330f729Sjoerg                              [2, 2, 3, 3, 2, 1]>,
539*7330f729Sjoerg  //
540*7330f729Sjoerg  // VLD2lnu
541*7330f729Sjoerg  InstrItinData<IIC_VLD2lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
542*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
543*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
544*7330f729Sjoerg                              [3, 3, 2, 1, 1, 1, 1, 1]>,
545*7330f729Sjoerg  //
546*7330f729Sjoerg  // VLD2dup
547*7330f729Sjoerg  InstrItinData<IIC_VLD2dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
548*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
549*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
550*7330f729Sjoerg                              [2, 2, 1]>,
551*7330f729Sjoerg  //
552*7330f729Sjoerg  // VLD2dupu
553*7330f729Sjoerg  InstrItinData<IIC_VLD2dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
554*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
555*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
556*7330f729Sjoerg                              [2, 2, 2, 1, 1]>,
557*7330f729Sjoerg  //
558*7330f729Sjoerg  // VLD3
559*7330f729Sjoerg  InstrItinData<IIC_VLD3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
560*7330f729Sjoerg                               InstrStage<4, [A8_NLSPipe], 0>,
561*7330f729Sjoerg                               InstrStage<4, [A8_LSPipe]>],
562*7330f729Sjoerg                              [3, 3, 4, 1]>,
563*7330f729Sjoerg  //
564*7330f729Sjoerg  // VLD3ln
565*7330f729Sjoerg  InstrItinData<IIC_VLD3ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
566*7330f729Sjoerg                               InstrStage<5, [A8_NLSPipe], 0>,
567*7330f729Sjoerg                               InstrStage<5, [A8_LSPipe]>],
568*7330f729Sjoerg                              [4, 4, 5, 1, 1, 1, 1, 2]>,
569*7330f729Sjoerg  //
570*7330f729Sjoerg  // VLD3u
571*7330f729Sjoerg  InstrItinData<IIC_VLD3u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
572*7330f729Sjoerg                               InstrStage<4, [A8_NLSPipe], 0>,
573*7330f729Sjoerg                               InstrStage<4, [A8_LSPipe]>],
574*7330f729Sjoerg                              [3, 3, 4, 2, 1]>,
575*7330f729Sjoerg  //
576*7330f729Sjoerg  // VLD3lnu
577*7330f729Sjoerg  InstrItinData<IIC_VLD3lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
578*7330f729Sjoerg                               InstrStage<5, [A8_NLSPipe], 0>,
579*7330f729Sjoerg                               InstrStage<5, [A8_LSPipe]>],
580*7330f729Sjoerg                              [4, 4, 5, 2, 1, 1, 1, 1, 1, 2]>,
581*7330f729Sjoerg  //
582*7330f729Sjoerg  // VLD3dup
583*7330f729Sjoerg  InstrItinData<IIC_VLD3dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
584*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
585*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
586*7330f729Sjoerg                              [2, 2, 3, 1]>,
587*7330f729Sjoerg  //
588*7330f729Sjoerg  // VLD3dupu
589*7330f729Sjoerg  InstrItinData<IIC_VLD3dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
590*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
591*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
592*7330f729Sjoerg                              [2, 2, 3, 2, 1, 1]>,
593*7330f729Sjoerg  //
594*7330f729Sjoerg  // VLD4
595*7330f729Sjoerg  InstrItinData<IIC_VLD4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
596*7330f729Sjoerg                               InstrStage<4, [A8_NLSPipe], 0>,
597*7330f729Sjoerg                               InstrStage<4, [A8_LSPipe]>],
598*7330f729Sjoerg                              [3, 3, 4, 4, 1]>,
599*7330f729Sjoerg  //
600*7330f729Sjoerg  // VLD4ln
601*7330f729Sjoerg  InstrItinData<IIC_VLD4ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
602*7330f729Sjoerg                               InstrStage<5, [A8_NLSPipe], 0>,
603*7330f729Sjoerg                               InstrStage<5, [A8_LSPipe]>],
604*7330f729Sjoerg                              [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>,
605*7330f729Sjoerg  //
606*7330f729Sjoerg  // VLD4u
607*7330f729Sjoerg  InstrItinData<IIC_VLD4u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
608*7330f729Sjoerg                               InstrStage<4, [A8_NLSPipe], 0>,
609*7330f729Sjoerg                               InstrStage<4, [A8_LSPipe]>],
610*7330f729Sjoerg                              [3, 3, 4, 4, 2, 1]>,
611*7330f729Sjoerg  //
612*7330f729Sjoerg  // VLD4lnu
613*7330f729Sjoerg  InstrItinData<IIC_VLD4lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
614*7330f729Sjoerg                               InstrStage<5, [A8_NLSPipe], 0>,
615*7330f729Sjoerg                               InstrStage<5, [A8_LSPipe]>],
616*7330f729Sjoerg                              [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>,
617*7330f729Sjoerg  //
618*7330f729Sjoerg  // VLD4dup
619*7330f729Sjoerg  InstrItinData<IIC_VLD4dup,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
620*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
621*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
622*7330f729Sjoerg                              [2, 2, 3, 3, 1]>,
623*7330f729Sjoerg  //
624*7330f729Sjoerg  // VLD4dupu
625*7330f729Sjoerg  InstrItinData<IIC_VLD4dupu, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
626*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
627*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
628*7330f729Sjoerg                              [2, 2, 3, 3, 2, 1, 1]>,
629*7330f729Sjoerg  //
630*7330f729Sjoerg  // VST1
631*7330f729Sjoerg  InstrItinData<IIC_VST1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
632*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
633*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
634*7330f729Sjoerg                              [1, 1, 1]>,
635*7330f729Sjoerg  //
636*7330f729Sjoerg  // VST1x2
637*7330f729Sjoerg  InstrItinData<IIC_VST1x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
638*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
639*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
640*7330f729Sjoerg                              [1, 1, 1, 1]>,
641*7330f729Sjoerg  //
642*7330f729Sjoerg  // VST1x3
643*7330f729Sjoerg  InstrItinData<IIC_VST1x3,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
644*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
645*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
646*7330f729Sjoerg                              [1, 1, 1, 1, 2]>,
647*7330f729Sjoerg  //
648*7330f729Sjoerg  // VST1x4
649*7330f729Sjoerg  InstrItinData<IIC_VST1x4,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
650*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
651*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
652*7330f729Sjoerg                              [1, 1, 1, 1, 2, 2]>,
653*7330f729Sjoerg  //
654*7330f729Sjoerg  // VST1u
655*7330f729Sjoerg  InstrItinData<IIC_VST1u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
656*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
657*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
658*7330f729Sjoerg                              [2, 1, 1, 1, 1]>,
659*7330f729Sjoerg  //
660*7330f729Sjoerg  // VST1x2u
661*7330f729Sjoerg  InstrItinData<IIC_VST1x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
662*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
663*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
664*7330f729Sjoerg                              [2, 1, 1, 1, 1, 1]>,
665*7330f729Sjoerg  //
666*7330f729Sjoerg  // VST1x3u
667*7330f729Sjoerg  InstrItinData<IIC_VST1x3u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
668*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
669*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
670*7330f729Sjoerg                              [2, 1, 1, 1, 1, 1, 2]>,
671*7330f729Sjoerg  //
672*7330f729Sjoerg  // VST1x4u
673*7330f729Sjoerg  InstrItinData<IIC_VST1x4u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
674*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
675*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
676*7330f729Sjoerg                              [2, 1, 1, 1, 1, 1, 2, 2]>,
677*7330f729Sjoerg  //
678*7330f729Sjoerg  // VST1ln
679*7330f729Sjoerg  InstrItinData<IIC_VST1ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
680*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
681*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
682*7330f729Sjoerg                              [1, 1, 1]>,
683*7330f729Sjoerg  //
684*7330f729Sjoerg  // VST1lnu
685*7330f729Sjoerg  InstrItinData<IIC_VST1lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
686*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
687*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
688*7330f729Sjoerg                              [2, 1, 1, 1, 1]>,
689*7330f729Sjoerg  //
690*7330f729Sjoerg  // VST2
691*7330f729Sjoerg  InstrItinData<IIC_VST2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
692*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
693*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
694*7330f729Sjoerg                              [1, 1, 1, 1]>,
695*7330f729Sjoerg  //
696*7330f729Sjoerg  // VST2x2
697*7330f729Sjoerg  InstrItinData<IIC_VST2x2,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
698*7330f729Sjoerg                               InstrStage<4, [A8_NLSPipe], 0>,
699*7330f729Sjoerg                               InstrStage<4, [A8_LSPipe]>],
700*7330f729Sjoerg                              [1, 1, 1, 1, 2, 2]>,
701*7330f729Sjoerg  //
702*7330f729Sjoerg  // VST2u
703*7330f729Sjoerg  InstrItinData<IIC_VST2u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
704*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
705*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
706*7330f729Sjoerg                              [2, 1, 1, 1, 1, 1]>,
707*7330f729Sjoerg  //
708*7330f729Sjoerg  // VST2x2u
709*7330f729Sjoerg  InstrItinData<IIC_VST2x2u,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
710*7330f729Sjoerg                               InstrStage<4, [A8_NLSPipe], 0>,
711*7330f729Sjoerg                               InstrStage<4, [A8_LSPipe]>],
712*7330f729Sjoerg                              [2, 1, 1, 1, 1, 1, 2, 2]>,
713*7330f729Sjoerg  //
714*7330f729Sjoerg  // VST2ln
715*7330f729Sjoerg  InstrItinData<IIC_VST2ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
716*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
717*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
718*7330f729Sjoerg                              [1, 1, 1, 1]>,
719*7330f729Sjoerg  //
720*7330f729Sjoerg  // VST2lnu
721*7330f729Sjoerg  InstrItinData<IIC_VST2lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
722*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
723*7330f729Sjoerg                               InstrStage<2, [A8_LSPipe]>],
724*7330f729Sjoerg                              [2, 1, 1, 1, 1, 1]>,
725*7330f729Sjoerg  //
726*7330f729Sjoerg  // VST3
727*7330f729Sjoerg  InstrItinData<IIC_VST3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
728*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
729*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
730*7330f729Sjoerg                              [1, 1, 1, 1, 2]>,
731*7330f729Sjoerg  //
732*7330f729Sjoerg  // VST3u
733*7330f729Sjoerg  InstrItinData<IIC_VST3u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
734*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
735*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
736*7330f729Sjoerg                              [2, 1, 1, 1, 1, 1, 2]>,
737*7330f729Sjoerg  //
738*7330f729Sjoerg  // VST3ln
739*7330f729Sjoerg  InstrItinData<IIC_VST3ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
740*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
741*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
742*7330f729Sjoerg                              [1, 1, 1, 1, 2]>,
743*7330f729Sjoerg  //
744*7330f729Sjoerg  // VST3lnu
745*7330f729Sjoerg  InstrItinData<IIC_VST3lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
746*7330f729Sjoerg                               InstrStage<3, [A8_NLSPipe], 0>,
747*7330f729Sjoerg                               InstrStage<3, [A8_LSPipe]>],
748*7330f729Sjoerg                              [2, 1, 1, 1, 1, 1, 2]>,
749*7330f729Sjoerg  //
750*7330f729Sjoerg  // VST4
751*7330f729Sjoerg  InstrItinData<IIC_VST4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
752*7330f729Sjoerg                               InstrStage<4, [A8_NLSPipe], 0>,
753*7330f729Sjoerg                               InstrStage<4, [A8_LSPipe]>],
754*7330f729Sjoerg                              [1, 1, 1, 1, 2, 2]>,
755*7330f729Sjoerg  //
756*7330f729Sjoerg  // VST4u
757*7330f729Sjoerg  InstrItinData<IIC_VST4u,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
758*7330f729Sjoerg                               InstrStage<4, [A8_NLSPipe], 0>,
759*7330f729Sjoerg                               InstrStage<4, [A8_LSPipe]>],
760*7330f729Sjoerg                              [2, 1, 1, 1, 1, 1, 2, 2]>,
761*7330f729Sjoerg  //
762*7330f729Sjoerg  // VST4ln
763*7330f729Sjoerg  InstrItinData<IIC_VST4ln,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
764*7330f729Sjoerg                               InstrStage<4, [A8_NLSPipe], 0>,
765*7330f729Sjoerg                               InstrStage<4, [A8_LSPipe]>],
766*7330f729Sjoerg                              [1, 1, 1, 1, 2, 2]>,
767*7330f729Sjoerg  //
768*7330f729Sjoerg  // VST4lnu
769*7330f729Sjoerg  InstrItinData<IIC_VST4lnu,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
770*7330f729Sjoerg                               InstrStage<4, [A8_NLSPipe], 0>,
771*7330f729Sjoerg                               InstrStage<4, [A8_LSPipe]>],
772*7330f729Sjoerg                              [2, 1, 1, 1, 1, 1, 2, 2]>,
773*7330f729Sjoerg  //
774*7330f729Sjoerg  // Double-register FP Unary
775*7330f729Sjoerg  InstrItinData<IIC_VUNAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
776*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [5, 2]>,
777*7330f729Sjoerg  //
778*7330f729Sjoerg  // Quad-register FP Unary
779*7330f729Sjoerg  // Result written in N5, but that is relative to the last cycle of multicycle,
780*7330f729Sjoerg  // so we use 6 for those cases
781*7330f729Sjoerg  InstrItinData<IIC_VUNAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
782*7330f729Sjoerg                               InstrStage<2, [A8_NPipe]>], [6, 2]>,
783*7330f729Sjoerg  //
784*7330f729Sjoerg  // Double-register FP Binary
785*7330f729Sjoerg  InstrItinData<IIC_VBIND,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
786*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
787*7330f729Sjoerg  //
788*7330f729Sjoerg  // VPADD, etc.
789*7330f729Sjoerg  InstrItinData<IIC_VPBIND,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
790*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [5, 2, 2]>,
791*7330f729Sjoerg  //
792*7330f729Sjoerg  // Double-register FP VMUL
793*7330f729Sjoerg  InstrItinData<IIC_VFMULD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
794*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [5, 2, 1]>,
795*7330f729Sjoerg
796*7330f729Sjoerg  //
797*7330f729Sjoerg  // Quad-register FP Binary
798*7330f729Sjoerg  // Result written in N5, but that is relative to the last cycle of multicycle,
799*7330f729Sjoerg  // so we use 6 for those cases
800*7330f729Sjoerg  InstrItinData<IIC_VBINQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
801*7330f729Sjoerg                               InstrStage<2, [A8_NPipe]>], [6, 2, 2]>,
802*7330f729Sjoerg  //
803*7330f729Sjoerg  // Quad-register FP VMUL
804*7330f729Sjoerg  InstrItinData<IIC_VFMULQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
805*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [6, 2, 1]>,
806*7330f729Sjoerg  //
807*7330f729Sjoerg  // Move
808*7330f729Sjoerg  InstrItinData<IIC_VMOV,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
809*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [1, 1]>,
810*7330f729Sjoerg  //
811*7330f729Sjoerg  // Move Immediate
812*7330f729Sjoerg  InstrItinData<IIC_VMOVImm,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
813*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [3]>,
814*7330f729Sjoerg  //
815*7330f729Sjoerg  // Double-register Permute Move
816*7330f729Sjoerg  InstrItinData<IIC_VMOVD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
817*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
818*7330f729Sjoerg  //
819*7330f729Sjoerg  // Quad-register Permute Move
820*7330f729Sjoerg  // Result written in N2, but that is relative to the last cycle of multicycle,
821*7330f729Sjoerg  // so we use 3 for those cases
822*7330f729Sjoerg  InstrItinData<IIC_VMOVQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
823*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe]>], [3, 1]>,
824*7330f729Sjoerg  //
825*7330f729Sjoerg  // Integer to Single-precision Move
826*7330f729Sjoerg  InstrItinData<IIC_VMOVIS ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
827*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe]>], [2, 1]>,
828*7330f729Sjoerg  //
829*7330f729Sjoerg  // Integer to Double-precision Move
830*7330f729Sjoerg  InstrItinData<IIC_VMOVID ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
831*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
832*7330f729Sjoerg  //
833*7330f729Sjoerg  // Single-precision to Integer Move
834*7330f729Sjoerg  InstrItinData<IIC_VMOVSI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
835*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe]>], [20, 1]>,
836*7330f729Sjoerg  //
837*7330f729Sjoerg  // Double-precision to Integer Move
838*7330f729Sjoerg  InstrItinData<IIC_VMOVDI ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
839*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe]>], [20, 20, 1]>,
840*7330f729Sjoerg  //
841*7330f729Sjoerg  // Integer to Lane Move
842*7330f729Sjoerg  InstrItinData<IIC_VMOVISL , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
843*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
844*7330f729Sjoerg  //
845*7330f729Sjoerg  // Vector narrow move
846*7330f729Sjoerg  InstrItinData<IIC_VMOVN   , [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
847*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [2, 1]>,
848*7330f729Sjoerg  //
849*7330f729Sjoerg  // Double-register Permute
850*7330f729Sjoerg  InstrItinData<IIC_VPERMD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
851*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe]>], [2, 2, 1, 1]>,
852*7330f729Sjoerg  //
853*7330f729Sjoerg  // Quad-register Permute
854*7330f729Sjoerg  // Result written in N2, but that is relative to the last cycle of multicycle,
855*7330f729Sjoerg  // so we use 3 for those cases
856*7330f729Sjoerg  InstrItinData<IIC_VPERMQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
857*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe]>], [3, 3, 1, 1]>,
858*7330f729Sjoerg  //
859*7330f729Sjoerg  // Quad-register Permute (3 cycle issue)
860*7330f729Sjoerg  // Result written in N2, but that is relative to the last cycle of multicycle,
861*7330f729Sjoerg  // so we use 4 for those cases
862*7330f729Sjoerg  InstrItinData<IIC_VPERMQ3,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
863*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe]>,
864*7330f729Sjoerg                               InstrStage<1, [A8_NPipe], 0>,
865*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe]>], [4, 4, 1, 1]>,
866*7330f729Sjoerg  //
867*7330f729Sjoerg  // Double-register FP Multiple-Accumulate
868*7330f729Sjoerg  InstrItinData<IIC_VMACD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
869*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
870*7330f729Sjoerg  //
871*7330f729Sjoerg  // Quad-register FP Multiple-Accumulate
872*7330f729Sjoerg  // Result written in N9, but that is relative to the last cycle of multicycle,
873*7330f729Sjoerg  // so we use 10 for those cases
874*7330f729Sjoerg  InstrItinData<IIC_VMACQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
875*7330f729Sjoerg                               InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
876*7330f729Sjoerg  //
877*7330f729Sjoerg  // Double-register Fused FP Multiple-Accumulate
878*7330f729Sjoerg  InstrItinData<IIC_VFMACD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
879*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [9, 3, 2, 2]>,
880*7330f729Sjoerg  //
881*7330f729Sjoerg  // Quad-register Fused FP Multiple-Accumulate
882*7330f729Sjoerg  // Result written in N9, but that is relative to the last cycle of multicycle,
883*7330f729Sjoerg  // so we use 10 for those cases
884*7330f729Sjoerg  InstrItinData<IIC_VFMACQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
885*7330f729Sjoerg                               InstrStage<2, [A8_NPipe]>], [10, 3, 2, 2]>,
886*7330f729Sjoerg  //
887*7330f729Sjoerg  // Double-register Reciprical Step
888*7330f729Sjoerg  InstrItinData<IIC_VRECSD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
889*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [9, 2, 2]>,
890*7330f729Sjoerg  //
891*7330f729Sjoerg  // Quad-register Reciprical Step
892*7330f729Sjoerg  InstrItinData<IIC_VRECSQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
893*7330f729Sjoerg                               InstrStage<2, [A8_NPipe]>], [10, 2, 2]>,
894*7330f729Sjoerg  //
895*7330f729Sjoerg  // Double-register Integer Count
896*7330f729Sjoerg  InstrItinData<IIC_VCNTiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
897*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
898*7330f729Sjoerg  //
899*7330f729Sjoerg  // Quad-register Integer Count
900*7330f729Sjoerg  // Result written in N3, but that is relative to the last cycle of multicycle,
901*7330f729Sjoerg  // so we use 4 for those cases
902*7330f729Sjoerg  InstrItinData<IIC_VCNTiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
903*7330f729Sjoerg                               InstrStage<2, [A8_NPipe]>], [4, 2, 2]>,
904*7330f729Sjoerg  //
905*7330f729Sjoerg  // Double-register Integer Unary
906*7330f729Sjoerg  InstrItinData<IIC_VUNAiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
907*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [4, 2]>,
908*7330f729Sjoerg  //
909*7330f729Sjoerg  // Quad-register Integer Unary
910*7330f729Sjoerg  InstrItinData<IIC_VUNAiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
911*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [4, 2]>,
912*7330f729Sjoerg  //
913*7330f729Sjoerg  // Double-register Integer Q-Unary
914*7330f729Sjoerg  InstrItinData<IIC_VQUNAiD,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
915*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [4, 1]>,
916*7330f729Sjoerg  //
917*7330f729Sjoerg  // Quad-register Integer CountQ-Unary
918*7330f729Sjoerg  InstrItinData<IIC_VQUNAiQ,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
919*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [4, 1]>,
920*7330f729Sjoerg  //
921*7330f729Sjoerg  // Double-register Integer Binary
922*7330f729Sjoerg  InstrItinData<IIC_VBINiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
923*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
924*7330f729Sjoerg  //
925*7330f729Sjoerg  // Quad-register Integer Binary
926*7330f729Sjoerg  InstrItinData<IIC_VBINiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
927*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [3, 2, 2]>,
928*7330f729Sjoerg  //
929*7330f729Sjoerg  // Double-register Integer Binary (4 cycle)
930*7330f729Sjoerg  InstrItinData<IIC_VBINi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
931*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
932*7330f729Sjoerg  //
933*7330f729Sjoerg  // Quad-register Integer Binary (4 cycle)
934*7330f729Sjoerg  InstrItinData<IIC_VBINi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
935*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
936*7330f729Sjoerg
937*7330f729Sjoerg  //
938*7330f729Sjoerg  // Double-register Integer Subtract
939*7330f729Sjoerg  InstrItinData<IIC_VSUBiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
940*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
941*7330f729Sjoerg  //
942*7330f729Sjoerg  // Quad-register Integer Subtract
943*7330f729Sjoerg  InstrItinData<IIC_VSUBiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
944*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [3, 2, 1]>,
945*7330f729Sjoerg  //
946*7330f729Sjoerg  // Double-register Integer Subtract
947*7330f729Sjoerg  InstrItinData<IIC_VSUBi4D,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
948*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
949*7330f729Sjoerg  //
950*7330f729Sjoerg  // Quad-register Integer Subtract
951*7330f729Sjoerg  InstrItinData<IIC_VSUBi4Q,  [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
952*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [4, 2, 1]>,
953*7330f729Sjoerg  //
954*7330f729Sjoerg  // Double-register Integer Shift
955*7330f729Sjoerg  InstrItinData<IIC_VSHLiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
956*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [3, 1, 1]>,
957*7330f729Sjoerg  //
958*7330f729Sjoerg  // Quad-register Integer Shift
959*7330f729Sjoerg  InstrItinData<IIC_VSHLiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
960*7330f729Sjoerg                               InstrStage<2, [A8_NPipe]>], [4, 1, 1]>,
961*7330f729Sjoerg  //
962*7330f729Sjoerg  // Double-register Integer Shift (4 cycle)
963*7330f729Sjoerg  InstrItinData<IIC_VSHLi4D,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
964*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [4, 1, 1]>,
965*7330f729Sjoerg  //
966*7330f729Sjoerg  // Quad-register Integer Shift (4 cycle)
967*7330f729Sjoerg  InstrItinData<IIC_VSHLi4Q,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
968*7330f729Sjoerg                               InstrStage<2, [A8_NPipe]>], [5, 1, 1]>,
969*7330f729Sjoerg  //
970*7330f729Sjoerg  // Double-register Integer Pair Add Long
971*7330f729Sjoerg  InstrItinData<IIC_VPALiD,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
972*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [6, 3, 1]>,
973*7330f729Sjoerg  //
974*7330f729Sjoerg  // Quad-register Integer Pair Add Long
975*7330f729Sjoerg  InstrItinData<IIC_VPALiQ,   [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
976*7330f729Sjoerg                               InstrStage<2, [A8_NPipe]>], [7, 3, 1]>,
977*7330f729Sjoerg  //
978*7330f729Sjoerg  // Double-register Absolute Difference and Accumulate
979*7330f729Sjoerg  InstrItinData<IIC_VABAD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
980*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [6, 3, 2, 1]>,
981*7330f729Sjoerg  //
982*7330f729Sjoerg  // Quad-register Absolute Difference and Accumulate
983*7330f729Sjoerg  InstrItinData<IIC_VABAQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
984*7330f729Sjoerg                               InstrStage<2, [A8_NPipe]>], [6, 3, 2, 1]>,
985*7330f729Sjoerg
986*7330f729Sjoerg  //
987*7330f729Sjoerg  // Double-register Integer Multiply (.8, .16)
988*7330f729Sjoerg  InstrItinData<IIC_VMULi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
989*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [6, 2, 2]>,
990*7330f729Sjoerg  //
991*7330f729Sjoerg  // Double-register Integer Multiply (.32)
992*7330f729Sjoerg  InstrItinData<IIC_VMULi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
993*7330f729Sjoerg                               InstrStage<2, [A8_NPipe]>], [7, 2, 1]>,
994*7330f729Sjoerg  //
995*7330f729Sjoerg  // Quad-register Integer Multiply (.8, .16)
996*7330f729Sjoerg  InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
997*7330f729Sjoerg                               InstrStage<2, [A8_NPipe]>], [7, 2, 2]>,
998*7330f729Sjoerg  //
999*7330f729Sjoerg  // Quad-register Integer Multiply (.32)
1000*7330f729Sjoerg  InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1001*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>,
1002*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
1003*7330f729Sjoerg                               InstrStage<3, [A8_NPipe]>], [9, 2, 1]>,
1004*7330f729Sjoerg  //
1005*7330f729Sjoerg  // Double-register Integer Multiply-Accumulate (.8, .16)
1006*7330f729Sjoerg  InstrItinData<IIC_VMACi16D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1007*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>], [6, 3, 2, 2]>,
1008*7330f729Sjoerg  //
1009*7330f729Sjoerg  // Double-register Integer Multiply-Accumulate (.32)
1010*7330f729Sjoerg  InstrItinData<IIC_VMACi32D, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1011*7330f729Sjoerg                               InstrStage<2, [A8_NPipe]>], [7, 3, 2, 1]>,
1012*7330f729Sjoerg  //
1013*7330f729Sjoerg  // Quad-register Integer Multiply-Accumulate (.8, .16)
1014*7330f729Sjoerg  InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1015*7330f729Sjoerg                               InstrStage<2, [A8_NPipe]>], [7, 3, 2, 2]>,
1016*7330f729Sjoerg  //
1017*7330f729Sjoerg  // Quad-register Integer Multiply-Accumulate (.32)
1018*7330f729Sjoerg  InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1019*7330f729Sjoerg                               InstrStage<1, [A8_NPipe]>,
1020*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe], 0>,
1021*7330f729Sjoerg                               InstrStage<3, [A8_NPipe]>], [9, 3, 2, 1]>,
1022*7330f729Sjoerg  //
1023*7330f729Sjoerg  // Double-register VEXT
1024*7330f729Sjoerg  InstrItinData<IIC_VEXTD,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1025*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe]>], [2, 1, 1]>,
1026*7330f729Sjoerg  //
1027*7330f729Sjoerg  // Quad-register VEXT
1028*7330f729Sjoerg  InstrItinData<IIC_VEXTQ,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1029*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 1]>,
1030*7330f729Sjoerg  //
1031*7330f729Sjoerg  // VTB
1032*7330f729Sjoerg  InstrItinData<IIC_VTB1,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1033*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe]>], [3, 2, 1]>,
1034*7330f729Sjoerg  InstrItinData<IIC_VTB2,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1035*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe]>], [3, 2, 2, 1]>,
1036*7330f729Sjoerg  InstrItinData<IIC_VTB3,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1037*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe]>,
1038*7330f729Sjoerg                               InstrStage<1, [A8_NPipe], 0>,
1039*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 1]>,
1040*7330f729Sjoerg  InstrItinData<IIC_VTB4,     [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1041*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe]>,
1042*7330f729Sjoerg                               InstrStage<1, [A8_NPipe], 0>,
1043*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
1044*7330f729Sjoerg  //
1045*7330f729Sjoerg  // VTBX
1046*7330f729Sjoerg  InstrItinData<IIC_VTBX1,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1047*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 1]>,
1048*7330f729Sjoerg  InstrItinData<IIC_VTBX2,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1049*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe]>], [3, 1, 2, 2, 1]>,
1050*7330f729Sjoerg  InstrItinData<IIC_VTBX3,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1051*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe]>,
1052*7330f729Sjoerg                               InstrStage<1, [A8_NPipe], 0>,
1053*7330f729Sjoerg                               InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
1054*7330f729Sjoerg  InstrItinData<IIC_VTBX4,    [InstrStage<1, [A8_Pipe0, A8_Pipe1], 0>,
1055*7330f729Sjoerg                               InstrStage<1, [A8_NLSPipe]>,
1056*7330f729Sjoerg                               InstrStage<1, [A8_NPipe], 0>,
1057*7330f729Sjoerg                            InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
1058*7330f729Sjoerg]>;
1059*7330f729Sjoerg
1060*7330f729Sjoerg// ===---------------------------------------------------------------------===//
1061*7330f729Sjoerg// This following definitions describe the simple machine model which
1062*7330f729Sjoerg// will replace itineraries.
1063*7330f729Sjoerg
1064*7330f729Sjoerg// Cortex-A8 machine model for scheduling and other instruction cost heuristics.
1065*7330f729Sjoergdef CortexA8Model : SchedMachineModel {
1066*7330f729Sjoerg  let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
1067*7330f729Sjoerg  let LoadLatency = 2; // Optimistic load latency assuming bypass.
1068*7330f729Sjoerg                       // This is overriden by OperandCycles if the
1069*7330f729Sjoerg                       // Itineraries are queried instead.
1070*7330f729Sjoerg  let MispredictPenalty = 13; // Based on estimate of pipeline depth.
1071*7330f729Sjoerg  let CompleteModel = 0;
1072*7330f729Sjoerg
1073*7330f729Sjoerg  let Itineraries = CortexA8Itineraries;
1074*7330f729Sjoerg}
1075