xref: /llvm-project/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp (revision 3d9327bd06e88c81174776276eadb7fb41cbb3dd)
1 //===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA ----*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file is part of the ARM Disassembler.
11 // It contains code to translate the data produced by the decoder into MCInsts.
12 // Documentation for the disassembler can be found in ARMDisassembler.h.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "arm-disassembler"
17 
18 #include "ARMDisassembler.h"
19 #include "ARMDisassemblerCore.h"
20 
21 #include "llvm/MC/MCInst.h"
22 #include "llvm/Target/TargetRegistry.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/MemoryObject.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 /// ARMDisassemblerTables.inc - ARMDisassemblerTables.inc is tblgen'ed from
29 /// RISCDisassemblerEmitter.cpp TableGen backend.  It contains:
30 ///
31 /// o Mappings from opcode to ARM/Thumb instruction format
32 ///
33 /// o static uint16_t decodeInstruction(uint32_t insn) - the decoding function
34 /// for an ARM instruction.
35 ///
36 /// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding
37 /// function for a Thumb instruction.
38 ///
39 #include "../ARMGenDisassemblerTables.inc"
40 
41 namespace llvm {
42 
43 namespace ARMDisassembler {
44 
45 /// showBitVector - Use the raw_ostream to log a diagnostic message describing
46 /// the inidividual bits of the instruction.  This is a sample output:
47 ///
48 ///  31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
49 /// -------------------------------------------------------------------------------------------------
50 /// | 1: 0: 1: 0| 1: 0: 1: 0| 1: 0: 1: 0| 1: 0: 1: 0| 1: 0: 1: 0| 1: 0: 1: 0| 1: 0: 1: 0| 1: 0: 1: 0|
51 /// -------------------------------------------------------------------------------------------------
52 ///
53 static inline void showBitVector(raw_ostream &os, const uint32_t &insn) {
54   os << " 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0 \n";
55   os << "-------------------------------------------------------------------------------------------------\n";
56   os << '|';
57   for (unsigned i = 32; i != 0; --i) {
58     if (insn >> (i - 1) & 0x01)
59       os << " 1";
60     else
61       os << " 0";
62     os << (i%4 == 1 ? '|' : ':');
63   }
64   os << '\n';
65   os << "-------------------------------------------------------------------------------------------------\n";
66   os << '\n';
67 }
68 
69 /// decodeARMInstruction is a decorator function which tries special cases of
70 /// instruction matching before calling the auto-generated decoder function.
71 static unsigned decodeARMInstruction(uint32_t &insn) {
72   if (slice(insn, 31, 28) == 15)
73     goto AutoGenedDecoder;
74 
75   // Special case processing, if any, goes here....
76 
77   // LLVM combines the offset mode of A8.6.197 & A8.6.198 into STRB.
78   // The insufficient encoding information of the combined instruction confuses
79   // the decoder wrt BFC/BFI.  Therefore, we try to recover here.
80   // For BFC, Inst{27-21} = 0b0111110 & Inst{6-0} = 0b0011111.
81   // For BFI, Inst{27-21} = 0b0111110 & Inst{6-4} = 0b001 & Inst{3-0} =! 0b1111.
82   if (slice(insn, 27, 21) == 0x3e && slice(insn, 6, 4) == 1) {
83     if (slice(insn, 3, 0) == 15)
84       return ARM::BFC;
85     else
86       return ARM::BFI;
87   }
88 
89   // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8.
90   // As a result, the decoder fails to decode UMULL properly.
91   if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) {
92     return ARM::UMULL;
93   }
94 
95   // Ditto for STR_PRE, which is a super-instruction for A8.6.194 & A8.6.195.
96   // As a result, the decoder fails to decode SBFX properly.
97   if (slice(insn, 27, 21) == 0x3d && slice(insn, 6, 4) == 5)
98     return ARM::SBFX;
99 
100   // And STRB_PRE, which is a super-instruction for A8.6.197 & A8.6.198.
101   // As a result, the decoder fails to decode UBFX properly.
102   if (slice(insn, 27, 21) == 0x3f && slice(insn, 6, 4) == 5)
103     return ARM::UBFX;
104 
105   // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2.
106   // As a result, the decoder fails to deocode SSAT properly.
107   if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1)
108     return slice(insn, 6, 6) == 0 ? ARM::SSATlsl : ARM::SSATasr;
109 
110   // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147.
111   // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT.
112   if (slice(insn, 27, 24) == 0) {
113     switch (slice(insn, 21, 20)) {
114     case 2:
115       switch (slice(insn, 7, 4)) {
116       case 11:
117         return ARM::STRHT;
118       default:
119         break; // fallthrough
120       }
121       break;
122     case 3:
123       switch (slice(insn, 7, 4)) {
124       case 11:
125         return ARM::LDRHT;
126       case 13:
127         return ARM::LDRSBT;
128       case 15:
129         return ARM::LDRSHT;
130       default:
131         break; // fallthrough
132       }
133       break;
134     default:
135       break;   // fallthrough
136     }
137   }
138 
139   // Ditto for SBCrs, which is a super-instruction for A8.6.152 & A8.6.153.
140   // As a result, the decoder fails to decode STRH_Post/LDRD_POST/STRD_POST
141   // properly.
142   if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 0) {
143     unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
144     switch (slice(insn, 7, 4)) {
145     case 11:
146       switch (PW) {
147       case 2: // Offset
148         return ARM::STRH;
149       case 3: // Pre-indexed
150         return ARM::STRH_PRE;
151       case 0: // Post-indexed
152         return ARM::STRH_POST;
153       default:
154         break; // fallthrough
155       }
156       break;
157     case 13:
158       switch (PW) {
159       case 2: // Offset
160         return ARM::LDRD;
161       case 3: // Pre-indexed
162         return ARM::LDRD_PRE;
163       case 0: // Post-indexed
164         return ARM::LDRD_POST;
165       default:
166         break; // fallthrough
167       }
168       break;
169     case 15:
170       switch (PW) {
171       case 2: // Offset
172         return ARM::STRD;
173       case 3: // Pre-indexed
174         return ARM::STRD_PRE;
175       case 0: // Post-indexed
176         return ARM::STRD_POST;
177       default:
178         break; // fallthrough
179       }
180       break;
181     default:
182       break; // fallthrough
183     }
184   }
185 
186   // Ditto for SBCSSrs, which is a super-instruction for A8.6.152 & A8.6.153.
187   // As a result, the decoder fails to decode LDRH_POST/LDRSB_POST/LDRSH_POST
188   // properly.
189   if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 1) {
190     unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21);
191     switch (slice(insn, 7, 4)) {
192     case 11:
193       switch (PW) {
194       case 2: // Offset
195         return ARM::LDRH;
196       case 3: // Pre-indexed
197         return ARM::LDRH_PRE;
198       case 0: // Post-indexed
199         return ARM::LDRH_POST;
200       default:
201         break; // fallthrough
202       }
203       break;
204     case 13:
205       switch (PW) {
206       case 2: // Offset
207         return ARM::LDRSB;
208       case 3: // Pre-indexed
209         return ARM::LDRSB_PRE;
210       case 0: // Post-indexed
211         return ARM::LDRSB_POST;
212       default:
213         break; // fallthrough
214       }
215       break;
216     case 15:
217       switch (PW) {
218       case 2: // Offset
219         return ARM::LDRSH;
220       case 3: // Pre-indexed
221         return ARM::LDRSH_PRE;
222       case 0: // Post-indexed
223         return ARM::LDRSH_POST;
224       default:
225         break; // fallthrough
226       }
227       break;
228     default:
229       break; // fallthrough
230     }
231   }
232 
233 AutoGenedDecoder:
234   // Calling the auto-generated decoder function.
235   return decodeInstruction(insn);
236 }
237 
238 // Helper function for special case handling of LDR (literal) and friends.
239 // See, for example, A6.3.7 Load word: Table A6-18 Load word.
240 // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
241 // before passing it on.
242 static unsigned T2Morph2LoadLiteral(unsigned Opcode) {
243   switch (Opcode) {
244   default:
245     return Opcode; // Return unmorphed opcode.
246 
247   case ARM::t2LDRDi8:
248     return ARM::t2LDRDpci;
249 
250   case ARM::t2LDR_POST:   case ARM::t2LDR_PRE:
251   case ARM::t2LDRi12:     case ARM::t2LDRi8:
252   case ARM::t2LDRs:
253     return ARM::t2LDRpci;
254 
255   case ARM::t2LDRB_POST:  case ARM::t2LDRB_PRE:
256   case ARM::t2LDRBi12:    case ARM::t2LDRBi8:
257   case ARM::t2LDRBs:
258     return ARM::t2LDRBpci;
259 
260   case ARM::t2LDRH_POST:  case ARM::t2LDRH_PRE:
261   case ARM::t2LDRHi12:    case ARM::t2LDRHi8:
262   case ARM::t2LDRHs:
263     return ARM::t2LDRHpci;
264 
265   case ARM::t2LDRSB_POST:  case ARM::t2LDRSB_PRE:
266   case ARM::t2LDRSBi12:    case ARM::t2LDRSBi8:
267   case ARM::t2LDRSBs:
268     return ARM::t2LDRSBpci;
269 
270   case ARM::t2LDRSH_POST:  case ARM::t2LDRSH_PRE:
271   case ARM::t2LDRSHi12:    case ARM::t2LDRSHi8:
272   case ARM::t2LDRSHs:
273     return ARM::t2LDRSHpci;
274   }
275 }
276 
277 /// decodeThumbSideEffect is a decorator function which can potentially twiddle
278 /// the instruction or morph the returned opcode under Thumb2.
279 ///
280 /// First it checks whether the insn is a NEON or VFP instr; if true, bit
281 /// twiddling could be performed on insn to turn it into an ARM NEON/VFP
282 /// equivalent instruction and decodeInstruction is called with the transformed
283 /// insn.
284 ///
285 /// Next, there is special handling for Load byte/halfword/word instruction by
286 /// checking whether Rn=0b1111 and call T2Morph2LoadLiteral() on the decoded
287 /// Thumb2 instruction.  See comments below for further details.
288 ///
289 /// Finally, one last check is made to see whether the insn is a NEON/VFP and
290 /// decodeInstruction(insn) is invoked on the original insn.
291 ///
292 /// Otherwise, decodeThumbInstruction is called with the original insn.
293 static unsigned decodeThumbSideEffect(bool IsThumb2, uint32_t &insn) {
294   if (IsThumb2) {
295     uint16_t op1 = slice(insn, 28, 27);
296     uint16_t op2 = slice(insn, 26, 20);
297 
298     // A6.3 32-bit Thumb instruction encoding
299     // Table A6-9 32-bit Thumb instruction encoding
300 
301     // The coprocessor instructions of interest are transformed to their ARM
302     // equivalents.
303 
304     // --------- Transform Begin Marker ---------
305     if ((op1 == 1 || op1 == 3) && slice(op2, 6, 4) == 7) {
306       // A7.4 Advanced SIMD data-processing instructions
307       // U bit of Thumb corresponds to Inst{24} of ARM.
308       uint16_t U = slice(op1, 1, 1);
309 
310       // Inst{28-24} of ARM = {1,0,0,1,U};
311       uint16_t bits28_24 = 9 << 1 | U;
312       DEBUG(showBitVector(errs(), insn));
313       setSlice(insn, 28, 24, bits28_24);
314       return decodeInstruction(insn);
315     }
316 
317     if (op1 == 3 && slice(op2, 6, 4) == 1 && slice(op2, 0, 0) == 0) {
318       // A7.7 Advanced SIMD element or structure load/store instructions
319       // Inst{27-24} of Thumb = 0b1001
320       // Inst{27-24} of ARM   = 0b0100
321       DEBUG(showBitVector(errs(), insn));
322       setSlice(insn, 27, 24, 4);
323       return decodeInstruction(insn);
324     }
325     // --------- Transform End Marker ---------
326 
327     // See, for example, A6.3.7 Load word: Table A6-18 Load word.
328     // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode
329     // before passing it on to our delegate.
330     if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1
331         && slice(insn, 19, 16) == 15)
332       return T2Morph2LoadLiteral(decodeThumbInstruction(insn));
333 
334     // One last check for NEON/VFP instructions.
335     if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1)
336       return decodeInstruction(insn);
337 
338     // Fall through.
339   }
340 
341   return decodeThumbInstruction(insn);
342 }
343 
344 static inline bool Thumb2PreloadOpcodeNoPCI(unsigned Opcode) {
345   switch (Opcode) {
346   default:
347     return false;
348   case ARM::t2PLDi12:   case ARM::t2PLDi8:
349   case ARM::t2PLDr:     case ARM::t2PLDs:
350   case ARM::t2PLDWi12:  case ARM::t2PLDWi8:
351   case ARM::t2PLDWr:    case ARM::t2PLDWs:
352   case ARM::t2PLIi12:   case ARM::t2PLIi8:
353   case ARM::t2PLIr:     case ARM::t2PLIs:
354     return true;
355   }
356 }
357 
358 static inline unsigned T2Morph2Preload2PCI(unsigned Opcode) {
359   switch (Opcode) {
360   default:
361     return 0;
362   case ARM::t2PLDi12:   case ARM::t2PLDi8:
363   case ARM::t2PLDr:     case ARM::t2PLDs:
364     return ARM::t2PLDpci;
365   case ARM::t2PLDWi12:  case ARM::t2PLDWi8:
366   case ARM::t2PLDWr:    case ARM::t2PLDWs:
367     return ARM::t2PLDWpci;
368   case ARM::t2PLIi12:   case ARM::t2PLIi8:
369   case ARM::t2PLIr:     case ARM::t2PLIs:
370     return ARM::t2PLIpci;
371   }
372 }
373 
374 //
375 // Public interface for the disassembler
376 //
377 
378 bool ARMDisassembler::getInstruction(MCInst &MI,
379                                      uint64_t &Size,
380                                      const MemoryObject &Region,
381                                      uint64_t Address,
382                                      raw_ostream &os) const {
383   // The machine instruction.
384   uint32_t insn;
385 
386   // We want to read exactly 4 bytes of data.
387   if (Region.readBytes(Address, 4, (uint8_t*)&insn, NULL) == -1)
388     return false;
389 
390   unsigned Opcode = decodeARMInstruction(insn);
391   ARMFormat Format = ARMFormats[Opcode];
392   NSFormat NSF = NSFormats[Opcode];
393   Size = 4;
394 
395   DEBUG({
396       errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
397              << " Format=" << stringForARMFormat(Format) << " NSFormat="
398              << stringForNSFormat(NSF) << '\n';
399       showBitVector(errs(), insn);
400     });
401 
402   AbstractARMMCBuilder *Builder =
403     ARMMCBuilderFactory::CreateMCBuilder(Opcode, Format, NSF);
404 
405   if (!Builder)
406     return false;
407 
408   if (!Builder->Build(MI, insn))
409     return false;
410 
411   delete Builder;
412 
413   return true;
414 }
415 
416 bool ThumbDisassembler::getInstruction(MCInst &MI,
417                                        uint64_t &Size,
418                                        const MemoryObject &Region,
419                                        uint64_t Address,
420                                        raw_ostream &os) const {
421   // The machine instruction.
422   uint32_t insn = 0;
423   uint32_t insn1 = 0;
424 
425   // A6.1 Thumb instruction set encoding
426   //
427   // If bits [15:11] of the halfword being decoded take any of the following
428   // values, the halfword is the first halfword of a 32-bit instruction:
429   // o 0b11101
430   // o 0b11110
431   // o 0b11111.
432   //
433   // Otherwise, the halfword is a 16-bit instruction.
434 
435   // Read 2 bytes of data first.
436   if (Region.readBytes(Address, 2, (uint8_t*)&insn, NULL) == -1)
437     return false;
438 
439   unsigned bits15_11 = slice(insn, 15, 11);
440   bool IsThumb2 = false;
441 
442   // 32-bit instructions if the bits [15:11] of the halfword matches
443   // { 0b11101 /* 0x1D */, 0b11110 /* 0x1E */, ob11111 /* 0x1F */ }.
444   if (bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) {
445     IsThumb2 = true;
446     if (Region.readBytes(Address + 2, 2, (uint8_t*)&insn1, NULL) == -1)
447       return false;
448     insn = (insn << 16 | insn1);
449   }
450 
451   // The insn could potentially be bit-twiddled in order to be decoded as an ARM
452   // NEON/VFP opcode.  In such case, the modified insn is later disassembled as
453   // an ARM NEON/VFP instruction.
454   //
455   // This is a short term solution for lack of encoding bits specified for the
456   // Thumb2 NEON/VFP instructions.  The long term solution could be adding some
457   // infrastructure to have each instruction support more than one encodings.
458   // Which encoding is used would be based on which subtarget the compiler/
459   // disassembler is working with at the time.  This would allow the sharing of
460   // the NEON patterns between ARM and Thumb2, as well as potential greater
461   // sharing between the regular ARM instructions and the 32-bit wide Thumb2
462   // instructions as well.
463   unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn);
464 
465   // A8.6.117/119/120/121.
466   // PLD/PLDW/PLI instructions with Rn==15 is transformed to the pci variant.
467   if (Thumb2PreloadOpcodeNoPCI(Opcode) && slice(insn, 19, 16) == 15)
468     Opcode = T2Morph2Preload2PCI(Opcode);
469 
470   ARMFormat Format = ARMFormats[Opcode];
471   NSFormat NSF = NSFormats[Opcode];
472   Size = IsThumb2 ? 4 : 2;
473 
474   DEBUG({
475       errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode)
476              << " Format=" << stringForARMFormat(Format) << " NSFormat="
477              << stringForNSFormat(NSF) << '\n';
478       showBitVector(errs(), insn);
479     });
480 
481   AbstractARMMCBuilder *Builder =
482     ARMMCBuilderFactory::CreateMCBuilder(Opcode, Format, NSF);
483 
484   if (!Builder)
485     return false;
486 
487   if (!Builder->Build(MI, insn))
488     return false;
489 
490   delete Builder;
491 
492   return true;
493 }
494 
495 } // namespace ARM Disassembler
496 
497 static const MCDisassembler *createARMDisassembler(const Target &T) {
498   return new ARMDisassembler::ARMDisassembler;
499 }
500 
501 static const MCDisassembler *createThumbDisassembler(const Target &T) {
502   return new ARMDisassembler::ThumbDisassembler;
503 }
504 
505 extern "C" void LLVMInitializeARMDisassembler() {
506   // Register the disassembler.
507   TargetRegistry::RegisterMCDisassembler(TheARMTarget,
508                                          createARMDisassembler);
509   TargetRegistry::RegisterMCDisassembler(TheThumbTarget,
510                                          createThumbDisassembler);
511 }
512 
513 } // namespace llvm
514