xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Target/AArch64/AArch64SchedA55.td (revision a04395531661c5e8d314125d5ae77d4cbedd5d73)
1//==- AArch64SchedCortexA55.td - ARM Cortex-A55 Scheduling Definitions -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the machine model for the ARM Cortex-A55 processors.
10//
11//===----------------------------------------------------------------------===//
12
13// ===---------------------------------------------------------------------===//
14// The following definitions describe the per-operand machine model.
15// This works with MachineScheduler. See MCSchedModel.h for details.
16
17// Cortex-A55 machine model for scheduling and other instruction cost heuristics.
18def CortexA55Model : SchedMachineModel {
19  let MicroOpBufferSize = 0;  // The Cortex-A55 is an in-order processor
20  let IssueWidth = 2;         // It dual-issues under most circumstances
21  let LoadLatency = 4;        // Cycles for loads to access the cache. The
22                              // optimisation guide shows that most loads have
23                              // a latency of 3, but some have a latency of 4
24                              // or 5. Setting it 4 looked to be good trade-off.
25  let MispredictPenalty = 8;  // A branch direction mispredict.
26  let PostRAScheduler = 1;    // Enable PostRA scheduler pass.
27  let CompleteModel = 0;      // Covers instructions applicable to Cortex-A55.
28
29  list<Predicate> UnsupportedFeatures = [HasSVE];
30
31  // FIXME: Remove when all errors have been fixed.
32  let FullInstRWOverlapCheck = 0;
33}
34
35//===----------------------------------------------------------------------===//
36// Define each kind of processor resource and number available.
37
38// Modeling each pipeline as a ProcResource using the BufferSize = 0 since the
39// Cortex-A55 is in-order.
40
41def CortexA55UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
42def CortexA55UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC, 64-bi wide
43def CortexA55UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division, not pipelined
44def CortexA55UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load pipe
45def CortexA55UnitSt     : ProcResource<1> { let BufferSize = 0; } // Store pipe
46def CortexA55UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
47
48// The FP DIV/SQRT instructions execute totally differently from the FP ALU
49// instructions, which can mostly be dual-issued; that's why for now we model
50// them with 2 resources.
51def CortexA55UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU
52def CortexA55UnitFPMAC  : ProcResource<2> { let BufferSize = 0; } // FP MAC
53def CortexA55UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP Div/SQRT, 64/128
54
55//===----------------------------------------------------------------------===//
56// Subtarget-specific SchedWrite types
57
58let SchedModel = CortexA55Model in {
59
60// These latencies are modeled without taking into account forwarding paths
61// (the software optimisation guide lists latencies taking into account
62// typical forwarding paths).
63def : WriteRes<WriteImm, [CortexA55UnitALU]> { let Latency = 3; }    // MOVN, MOVZ
64def : WriteRes<WriteI, [CortexA55UnitALU]> { let Latency = 3; }      // ALU
65def : WriteRes<WriteISReg, [CortexA55UnitALU]> { let Latency = 3; }  // ALU of Shifted-Reg
66def : WriteRes<WriteIEReg, [CortexA55UnitALU]> { let Latency = 3; }  // ALU of Extended-Reg
67def : WriteRes<WriteExtr, [CortexA55UnitALU]> { let Latency = 3; }   // EXTR from a reg pair
68def : WriteRes<WriteIS, [CortexA55UnitALU]> { let Latency = 3; }     // Shift/Scale
69
70// MAC
71def : WriteRes<WriteIM32, [CortexA55UnitMAC]> { let Latency = 4; }   // 32-bit Multiply
72def : WriteRes<WriteIM64, [CortexA55UnitMAC]> { let Latency = 4; }   // 64-bit Multiply
73
74// Div
75def : WriteRes<WriteID32, [CortexA55UnitDiv]> {
76  let Latency = 8; let ResourceCycles = [8];
77}
78def : WriteRes<WriteID64, [CortexA55UnitDiv]> {
79  let Latency = 8; let ResourceCycles = [8];
80}
81
82// Load
83def : WriteRes<WriteLD, [CortexA55UnitLd]> { let Latency = 3; }
84def : WriteRes<WriteLDIdx, [CortexA55UnitLd]> { let Latency = 4; }
85def : WriteRes<WriteLDHi, [CortexA55UnitLd]> { let Latency = 5; }
86
87// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd
88//               below, choosing the median of 3 which makes the latency 6.
89// An extra cycle is needed to get the swizzling right.
90def : WriteRes<WriteVLD, [CortexA55UnitLd]> { let Latency = 6;
91                                           let ResourceCycles = [3]; }
92def CortexA55WriteVLD1 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; }
93def CortexA55WriteVLD2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5;
94                                                  let ResourceCycles = [2]; }
95def CortexA55WriteVLD3 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 6;
96                                                  let ResourceCycles = [3]; }
97def CortexA55WriteVLD4 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 7;
98                                                  let ResourceCycles = [4]; }
99def CortexA55WriteVLD5 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 8;
100                                                  let ResourceCycles = [5]; }
101def CortexA55WriteVLD6 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 9;
102                                                  let ResourceCycles = [6]; }
103def CortexA55WriteVLD7 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 10;
104                                                  let ResourceCycles = [7]; }
105def CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11;
106                                                  let ResourceCycles = [8]; }
107
108// Pre/Post Indexing - Performed as part of address generation
109def : WriteRes<WriteAdr, []> { let Latency = 0; }
110
111// Store
112def : WriteRes<WriteST, [CortexA55UnitSt]> { let Latency = 4; }
113def : WriteRes<WriteSTP, [CortexA55UnitSt]> { let Latency = 4; }
114def : WriteRes<WriteSTIdx, [CortexA55UnitSt]> { let Latency = 4; }
115def : WriteRes<WriteSTX, [CortexA55UnitSt]> { let Latency = 4; }
116
117// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
118def : WriteRes<WriteVST, [CortexA55UnitSt]> { let Latency = 5;
119                                          let ResourceCycles = [2];}
120def CortexA55WriteVST1 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 4; }
121def CortexA55WriteVST2 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
122                                                  let ResourceCycles = [2]; }
123def CortexA55WriteVST3 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 6;
124                                                  let ResourceCycles = [3]; }
125def CortexA55WriteVST4 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
126                                                  let ResourceCycles = [4]; }
127
128def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
129
130// Branch
131def : WriteRes<WriteBr, [CortexA55UnitB]>;
132def : WriteRes<WriteBrReg, [CortexA55UnitB]>;
133def : WriteRes<WriteSys, [CortexA55UnitB]>;
134def : WriteRes<WriteBarrier, [CortexA55UnitB]>;
135def : WriteRes<WriteHint, [CortexA55UnitB]>;
136
137// FP ALU
138//   As WriteF result is produced in F5 and it can be mostly forwarded
139//   to consumer at F1, the effectively latency is set as 4.
140def : WriteRes<WriteF, [CortexA55UnitFPALU]> { let Latency = 4; }
141def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
142def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
143def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
144def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
145def : WriteRes<WriteV, [CortexA55UnitFPALU]> { let Latency = 4; }
146
147// FP ALU specific new schedwrite definitions
148def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;}
149def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;}
150def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;}
151
152// FP Mul, Div, Sqrt. Div/Sqrt are not pipelined
153def : WriteRes<WriteFMul, [CortexA55UnitFPMAC]> { let Latency = 4; }
154
155let RetireOOO = 1 in {
156def : WriteRes<WriteFDiv, [CortexA55UnitFPDIV]> { let Latency = 22;
157                                            let ResourceCycles = [29]; }
158def CortexA55WriteFMAC : SchedWriteRes<[CortexA55UnitFPMAC]> { let Latency = 4; }
159def CortexA55WriteFDivHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
160                                                     let ResourceCycles = [5]; }
161def CortexA55WriteFDivSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 13;
162                                                     let ResourceCycles = [10]; }
163def CortexA55WriteFDivDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
164                                                     let ResourceCycles = [19]; }
165def CortexA55WriteFSqrtHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
166                                                      let ResourceCycles = [5]; }
167def CortexA55WriteFSqrtSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 12;
168                                                      let ResourceCycles = [9]; }
169def CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
170                                                      let ResourceCycles = [19]; }
171}
172//===----------------------------------------------------------------------===//
173// Subtarget-specific SchedRead types.
174
175def : ReadAdvance<ReadVLD, 0>;
176def : ReadAdvance<ReadExtrHi, 1>;
177def : ReadAdvance<ReadAdrBase, 1>;
178
179// ALU - ALU input operands are generally needed in EX1. An operand produced in
180//       in say EX2 can be forwarded for consumption to ALU in EX1, thereby
181//       allowing back-to-back ALU operations such as add. If an operand requires
182//       a shift, it will, however, be required in ISS stage.
183def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
184                             WriteISReg, WriteIEReg,WriteIS,
185                             WriteID32,WriteID64,
186                             WriteIM32,WriteIM64]>;
187// Shifted operand
188def CortexA55ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI,
189                                          WriteISReg, WriteIEReg,WriteIS,
190                                          WriteID32,WriteID64,
191                                          WriteIM32,WriteIM64]>;
192def CortexA55ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI,
193                                             WriteISReg, WriteIEReg,WriteIS,
194                                             WriteID32,WriteID64,
195                                             WriteIM32,WriteIM64]>;
196def CortexA55ReadISReg : SchedReadVariant<[
197        SchedVar<RegShiftedPred, [CortexA55ReadShifted]>,
198        SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
199def : SchedAlias<ReadISReg, CortexA55ReadISReg>;
200
201def CortexA55ReadIEReg : SchedReadVariant<[
202        SchedVar<RegExtendedPred, [CortexA55ReadShifted]>,
203        SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
204def : SchedAlias<ReadIEReg, CortexA55ReadIEReg>;
205
206// MUL
207def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
208                              WriteISReg, WriteIEReg,WriteIS,
209                              WriteID32,WriteID64,
210                              WriteIM32,WriteIM64]>;
211def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI,
212                               WriteISReg, WriteIEReg,WriteIS,
213                               WriteID32,WriteID64,
214                               WriteIM32,WriteIM64]>;
215
216// Div
217def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
218                              WriteISReg, WriteIEReg,WriteIS,
219                              WriteID32,WriteID64,
220                              WriteIM32,WriteIM64]>;
221
222//===----------------------------------------------------------------------===//
223// Subtarget-specific InstRWs.
224
225//---
226// Miscellaneous
227//---
228def : InstRW<[CortexA55WriteVLD2,CortexA55WriteVLD1], (instregex "LDP.*")>;
229def : InstRW<[WriteI], (instrs COPY)>;
230//---
231// Vector Loads - 64-bit per cycle
232//---
233//   1-element structures
234def : InstRW<[CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)$")>;                // single element
235def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // replicate
236def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
237def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
238def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)$")>; // multiple structures
239def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
240def : InstRW<[CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
241def : InstRW<[CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
242def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
243def : InstRW<[CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
244
245def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
246def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
247def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
248def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
249def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
250def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
251def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
252def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
253def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
254def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
255
256//    2-element structures
257def : InstRW<[CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)$")>;
258def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
259def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
260def : InstRW<[CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
261
262def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
263def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
264def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
265def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
266
267//    3-element structures
268def : InstRW<[CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
269def : InstRW<[CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
270def : InstRW<[CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
271def : InstRW<[CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
272
273def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
274def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
275def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
276def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
277
278//    4-element structures
279def : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>;                // load single 4-el structure to one lane of 4 regs.
280def : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs.
281def : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>;           // load multiple 4-el structures to 4 regs.
282def : InstRW<[CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
283
284def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
285def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
286def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
287def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
288
289//---
290// Vector Stores
291//---
292def : InstRW<[CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)$")>;
293def : InstRW<[CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
294def : InstRW<[CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
295def : InstRW<[CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
296def : InstRW<[CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
297def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
298def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
299def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
300def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
301def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
302
303def : InstRW<[CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)$")>;
304def : InstRW<[CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>;
305def : InstRW<[CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
306def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
307def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
308def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
309
310def : InstRW<[CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
311def : InstRW<[CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
312def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
313def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
314
315def : InstRW<[CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
316def : InstRW<[CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
317def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
318def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
319
320//---
321// Floating Point Conversions, MAC, DIV, SQRT
322//---
323def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
324def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;
325
326def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(S|U)(W|X)(H|S|D)")>;
327def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(h|s|d)")>;
328def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTFv")>;
329
330def : InstRW<[CortexA55WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
331def : InstRW<[CortexA55WriteFMAC], (instregex "^FML(A|S).*")>;
332def : InstRW<[CortexA55WriteFDivHP], (instrs FDIVHrr)>;
333def : InstRW<[CortexA55WriteFDivSP], (instrs FDIVSrr)>;
334def : InstRW<[CortexA55WriteFDivDP], (instrs FDIVDrr)>;
335def : InstRW<[CortexA55WriteFDivHP], (instregex "^FDIVv.*16$")>;
336def : InstRW<[CortexA55WriteFDivSP], (instregex "^FDIVv.*32$")>;
337def : InstRW<[CortexA55WriteFDivDP], (instregex "^FDIVv.*64$")>;
338def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
339def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
340def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
341
342}
343