1 //===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The CodeEmitterGen component for variable-length instructions.
10 //
11 // The basic CodeEmitterGen is almost exclusively designed for fixed-
12 // length instructions. A good analogy for its encoding scheme is how printf
13 // works: The (immutable) formatting string represent the fixed values in the
14 // encoded instruction. Placeholders (i.e. %something), on the other hand,
15 // represent encoding for instruction operands.
16 // ```
17 // printf("1101 %src 1001 %dst", <encoded value for operand `src`>,
18 // <encoded value for operand `dst`>);
19 // ```
20 // VarLenCodeEmitterGen in this file provides an alternative encoding scheme
21 // that works more like a C++ stream operator:
22 // ```
23 // OS << 0b1101;
24 // if (Cond)
25 // OS << OperandEncoding0;
26 // OS << 0b1001 << OperandEncoding1;
27 // ```
28 // You are free to concatenate arbitrary types (and sizes) of encoding
29 // fragments on any bit position, bringing more flexibilities on defining
30 // encoding for variable-length instructions.
31 //
32 // In a more specific way, instruction encoding is represented by a DAG type
33 // `Inst` field. Here is an example:
34 // ```
35 // dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001,
36 // (operand "$dst", 4));
37 // ```
38 // It represents the following instruction encoding:
39 // ```
40 // MSB LSB
41 // 1101<encoding for operand src>1001<encoding for operand dst>
42 // ```
43 // For more details about DAG operators in the above snippet, please
44 // refer to \file include/llvm/Target/Target.td.
45 //
46 // VarLenCodeEmitter will convert the above DAG into the same helper function
47 // generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except
48 // for few details).
49 //
50 //===----------------------------------------------------------------------===//
51
52 #include "VarLenCodeEmitterGen.h"
53 #include "CodeGenHwModes.h"
54 #include "CodeGenInstruction.h"
55 #include "CodeGenTarget.h"
56 #include "InfoByHwMode.h"
57 #include "llvm/ADT/ArrayRef.h"
58 #include "llvm/ADT/DenseMap.h"
59 #include "llvm/Support/raw_ostream.h"
60 #include "llvm/TableGen/Error.h"
61
62 using namespace llvm;
63
64 namespace {
65
66 class VarLenCodeEmitterGen {
67 RecordKeeper &Records;
68
69 DenseMap<Record *, VarLenInst> VarLenInsts;
70
71 // Emit based values (i.e. fixed bits in the encoded instructions)
72 void emitInstructionBaseValues(
73 raw_ostream &OS,
74 ArrayRef<const CodeGenInstruction *> NumberedInstructions,
75 CodeGenTarget &Target, int HwMode = -1);
76
77 std::string getInstructionCase(Record *R, CodeGenTarget &Target);
78 std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
79 CodeGenTarget &Target);
80
81 public:
VarLenCodeEmitterGen(RecordKeeper & R)82 explicit VarLenCodeEmitterGen(RecordKeeper &R) : Records(R) {}
83
84 void run(raw_ostream &OS);
85 };
86 } // end anonymous namespace
87
88 // Get the name of custom encoder or decoder, if there is any.
89 // Returns `{encoder name, decoder name}`.
getCustomCoders(ArrayRef<Init * > Args)90 static std::pair<StringRef, StringRef> getCustomCoders(ArrayRef<Init *> Args) {
91 std::pair<StringRef, StringRef> Result;
92 for (const auto *Arg : Args) {
93 const auto *DI = dyn_cast<DagInit>(Arg);
94 if (!DI)
95 continue;
96 const Init *Op = DI->getOperator();
97 if (!isa<DefInit>(Op))
98 continue;
99 // syntax: `(<encoder | decoder> "function name")`
100 StringRef OpName = cast<DefInit>(Op)->getDef()->getName();
101 if (OpName != "encoder" && OpName != "decoder")
102 continue;
103 if (!DI->getNumArgs() || !isa<StringInit>(DI->getArg(0)))
104 PrintFatalError("expected '" + OpName +
105 "' directive to be followed by a custom function name.");
106 StringRef FuncName = cast<StringInit>(DI->getArg(0))->getValue();
107 if (OpName == "encoder")
108 Result.first = FuncName;
109 else
110 Result.second = FuncName;
111 }
112 return Result;
113 }
114
VarLenInst(const DagInit * DI,const RecordVal * TheDef)115 VarLenInst::VarLenInst(const DagInit *DI, const RecordVal *TheDef)
116 : TheDef(TheDef), NumBits(0U) {
117 buildRec(DI);
118 for (const auto &S : Segments)
119 NumBits += S.BitWidth;
120 }
121
buildRec(const DagInit * DI)122 void VarLenInst::buildRec(const DagInit *DI) {
123 assert(TheDef && "The def record is nullptr ?");
124
125 std::string Op = DI->getOperator()->getAsString();
126
127 if (Op == "ascend" || Op == "descend") {
128 bool Reverse = Op == "descend";
129 int i = Reverse ? DI->getNumArgs() - 1 : 0;
130 int e = Reverse ? -1 : DI->getNumArgs();
131 int s = Reverse ? -1 : 1;
132 for (; i != e; i += s) {
133 const Init *Arg = DI->getArg(i);
134 if (const auto *BI = dyn_cast<BitsInit>(Arg)) {
135 if (!BI->isComplete())
136 PrintFatalError(TheDef->getLoc(),
137 "Expecting complete bits init in `" + Op + "`");
138 Segments.push_back({BI->getNumBits(), BI});
139 } else if (const auto *BI = dyn_cast<BitInit>(Arg)) {
140 if (!BI->isConcrete())
141 PrintFatalError(TheDef->getLoc(),
142 "Expecting concrete bit init in `" + Op + "`");
143 Segments.push_back({1, BI});
144 } else if (const auto *SubDI = dyn_cast<DagInit>(Arg)) {
145 buildRec(SubDI);
146 } else {
147 PrintFatalError(TheDef->getLoc(), "Unrecognized type of argument in `" +
148 Op + "`: " + Arg->getAsString());
149 }
150 }
151 } else if (Op == "operand") {
152 // (operand <operand name>, <# of bits>,
153 // [(encoder <custom encoder>)][, (decoder <custom decoder>)])
154 if (DI->getNumArgs() < 2)
155 PrintFatalError(TheDef->getLoc(),
156 "Expecting at least 2 arguments for `operand`");
157 HasDynamicSegment = true;
158 const Init *OperandName = DI->getArg(0), *NumBits = DI->getArg(1);
159 if (!isa<StringInit>(OperandName) || !isa<IntInit>(NumBits))
160 PrintFatalError(TheDef->getLoc(), "Invalid argument types for `operand`");
161
162 auto NumBitsVal = cast<IntInit>(NumBits)->getValue();
163 if (NumBitsVal <= 0)
164 PrintFatalError(TheDef->getLoc(), "Invalid number of bits for `operand`");
165
166 auto [CustomEncoder, CustomDecoder] =
167 getCustomCoders(DI->getArgs().slice(2));
168 Segments.push_back({static_cast<unsigned>(NumBitsVal), OperandName,
169 CustomEncoder, CustomDecoder});
170 } else if (Op == "slice") {
171 // (slice <operand name>, <high / low bit>, <low / high bit>,
172 // [(encoder <custom encoder>)][, (decoder <custom decoder>)])
173 if (DI->getNumArgs() < 3)
174 PrintFatalError(TheDef->getLoc(),
175 "Expecting at least 3 arguments for `slice`");
176 HasDynamicSegment = true;
177 Init *OperandName = DI->getArg(0), *HiBit = DI->getArg(1),
178 *LoBit = DI->getArg(2);
179 if (!isa<StringInit>(OperandName) || !isa<IntInit>(HiBit) ||
180 !isa<IntInit>(LoBit))
181 PrintFatalError(TheDef->getLoc(), "Invalid argument types for `slice`");
182
183 auto HiBitVal = cast<IntInit>(HiBit)->getValue(),
184 LoBitVal = cast<IntInit>(LoBit)->getValue();
185 if (HiBitVal < 0 || LoBitVal < 0)
186 PrintFatalError(TheDef->getLoc(), "Invalid bit range for `slice`");
187 bool NeedSwap = false;
188 unsigned NumBits = 0U;
189 if (HiBitVal < LoBitVal) {
190 NeedSwap = true;
191 NumBits = static_cast<unsigned>(LoBitVal - HiBitVal + 1);
192 } else {
193 NumBits = static_cast<unsigned>(HiBitVal - LoBitVal + 1);
194 }
195
196 auto [CustomEncoder, CustomDecoder] =
197 getCustomCoders(DI->getArgs().slice(3));
198
199 if (NeedSwap) {
200 // Normalization: Hi bit should always be the second argument.
201 Init *const NewArgs[] = {OperandName, LoBit, HiBit};
202 Segments.push_back({NumBits,
203 DagInit::get(DI->getOperator(), nullptr, NewArgs, {}),
204 CustomEncoder, CustomDecoder});
205 } else {
206 Segments.push_back({NumBits, DI, CustomEncoder, CustomDecoder});
207 }
208 }
209 }
210
run(raw_ostream & OS)211 void VarLenCodeEmitterGen::run(raw_ostream &OS) {
212 CodeGenTarget Target(Records);
213 auto Insts = Records.getAllDerivedDefinitions("Instruction");
214
215 auto NumberedInstructions = Target.getInstructionsByEnumValue();
216 const CodeGenHwModes &HWM = Target.getHwModes();
217
218 // The set of HwModes used by instruction encodings.
219 std::set<unsigned> HwModes;
220 for (const CodeGenInstruction *CGI : NumberedInstructions) {
221 Record *R = CGI->TheDef;
222
223 // Create the corresponding VarLenInst instance.
224 if (R->getValueAsString("Namespace") == "TargetOpcode" ||
225 R->getValueAsBit("isPseudo"))
226 continue;
227
228 if (const RecordVal *RV = R->getValue("EncodingInfos")) {
229 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
230 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
231 for (auto &KV : EBM) {
232 HwModes.insert(KV.first);
233 Record *EncodingDef = KV.second;
234 RecordVal *RV = EncodingDef->getValue("Inst");
235 DagInit *DI = cast<DagInit>(RV->getValue());
236 VarLenInsts.insert({EncodingDef, VarLenInst(DI, RV)});
237 }
238 continue;
239 }
240 }
241 RecordVal *RV = R->getValue("Inst");
242 DagInit *DI = cast<DagInit>(RV->getValue());
243 VarLenInsts.insert({R, VarLenInst(DI, RV)});
244 }
245
246 // Emit function declaration
247 OS << "void " << Target.getName()
248 << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"
249 << " SmallVectorImpl<MCFixup> &Fixups,\n"
250 << " APInt &Inst,\n"
251 << " APInt &Scratch,\n"
252 << " const MCSubtargetInfo &STI) const {\n";
253
254 // Emit instruction base values
255 if (HwModes.empty()) {
256 emitInstructionBaseValues(OS, NumberedInstructions, Target);
257 } else {
258 for (unsigned HwMode : HwModes)
259 emitInstructionBaseValues(OS, NumberedInstructions, Target, (int)HwMode);
260 }
261
262 if (!HwModes.empty()) {
263 OS << " const unsigned **Index;\n";
264 OS << " const uint64_t *InstBits;\n";
265 OS << " unsigned HwMode = STI.getHwMode();\n";
266 OS << " switch (HwMode) {\n";
267 OS << " default: llvm_unreachable(\"Unknown hardware mode!\"); break;\n";
268 for (unsigned I : HwModes) {
269 OS << " case " << I << ": InstBits = InstBits_" << HWM.getMode(I).Name
270 << "; Index = Index_" << HWM.getMode(I).Name << "; break;\n";
271 }
272 OS << " };\n";
273 }
274
275 // Emit helper function to retrieve base values.
276 OS << " auto getInstBits = [&](unsigned Opcode) -> APInt {\n"
277 << " unsigned NumBits = Index[Opcode][0];\n"
278 << " if (!NumBits)\n"
279 << " return APInt::getZeroWidth();\n"
280 << " unsigned Idx = Index[Opcode][1];\n"
281 << " ArrayRef<uint64_t> Data(&InstBits[Idx], "
282 << "APInt::getNumWords(NumBits));\n"
283 << " return APInt(NumBits, Data);\n"
284 << " };\n";
285
286 // Map to accumulate all the cases.
287 std::map<std::string, std::vector<std::string>> CaseMap;
288
289 // Construct all cases statement for each opcode
290 for (Record *R : Insts) {
291 if (R->getValueAsString("Namespace") == "TargetOpcode" ||
292 R->getValueAsBit("isPseudo"))
293 continue;
294 std::string InstName =
295 (R->getValueAsString("Namespace") + "::" + R->getName()).str();
296 std::string Case = getInstructionCase(R, Target);
297
298 CaseMap[Case].push_back(std::move(InstName));
299 }
300
301 // Emit initial function code
302 OS << " const unsigned opcode = MI.getOpcode();\n"
303 << " switch (opcode) {\n";
304
305 // Emit each case statement
306 for (const auto &C : CaseMap) {
307 const std::string &Case = C.first;
308 const auto &InstList = C.second;
309
310 ListSeparator LS("\n");
311 for (const auto &InstName : InstList)
312 OS << LS << " case " << InstName << ":";
313
314 OS << " {\n";
315 OS << Case;
316 OS << " break;\n"
317 << " }\n";
318 }
319 // Default case: unhandled opcode
320 OS << " default:\n"
321 << " std::string msg;\n"
322 << " raw_string_ostream Msg(msg);\n"
323 << " Msg << \"Not supported instr: \" << MI;\n"
324 << " report_fatal_error(Msg.str().c_str());\n"
325 << " }\n";
326 OS << "}\n\n";
327 }
328
emitInstBits(raw_ostream & IS,raw_ostream & SS,const APInt & Bits,unsigned & Index)329 static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits,
330 unsigned &Index) {
331 if (!Bits.getNumWords()) {
332 IS.indent(4) << "{/*NumBits*/0, /*Index*/0},";
333 return;
334 }
335
336 IS.indent(4) << "{/*NumBits*/" << Bits.getBitWidth() << ", "
337 << "/*Index*/" << Index << "},";
338
339 SS.indent(4);
340 for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index)
341 SS << "UINT64_C(" << utostr(Bits.getRawData()[I]) << "),";
342 }
343
emitInstructionBaseValues(raw_ostream & OS,ArrayRef<const CodeGenInstruction * > NumberedInstructions,CodeGenTarget & Target,int HwMode)344 void VarLenCodeEmitterGen::emitInstructionBaseValues(
345 raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
346 CodeGenTarget &Target, int HwMode) {
347 std::string IndexArray, StorageArray;
348 raw_string_ostream IS(IndexArray), SS(StorageArray);
349
350 const CodeGenHwModes &HWM = Target.getHwModes();
351 if (HwMode == -1) {
352 IS << " static const unsigned Index[][2] = {\n";
353 SS << " static const uint64_t InstBits[] = {\n";
354 } else {
355 StringRef Name = HWM.getMode(HwMode).Name;
356 IS << " static const unsigned Index_" << Name << "[][2] = {\n";
357 SS << " static const uint64_t InstBits_" << Name << "[] = {\n";
358 }
359
360 unsigned NumFixedValueWords = 0U;
361 for (const CodeGenInstruction *CGI : NumberedInstructions) {
362 Record *R = CGI->TheDef;
363
364 if (R->getValueAsString("Namespace") == "TargetOpcode" ||
365 R->getValueAsBit("isPseudo")) {
366 IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\n";
367 continue;
368 }
369
370 Record *EncodingDef = R;
371 if (const RecordVal *RV = R->getValue("EncodingInfos")) {
372 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
373 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
374 if (EBM.hasMode(HwMode))
375 EncodingDef = EBM.get(HwMode);
376 }
377 }
378
379 auto It = VarLenInsts.find(EncodingDef);
380 if (It == VarLenInsts.end())
381 PrintFatalError(EncodingDef, "VarLenInst not found for this record");
382 const VarLenInst &VLI = It->second;
383
384 unsigned i = 0U, BitWidth = VLI.size();
385
386 // Start by filling in fixed values.
387 APInt Value(BitWidth, 0);
388 auto SI = VLI.begin(), SE = VLI.end();
389 // Scan through all the segments that have fixed-bits values.
390 while (i < BitWidth && SI != SE) {
391 unsigned SegmentNumBits = SI->BitWidth;
392 if (const auto *BI = dyn_cast<BitsInit>(SI->Value)) {
393 for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) {
394 auto *B = cast<BitInit>(BI->getBit(Idx));
395 Value.setBitVal(i + Idx, B->getValue());
396 }
397 }
398 if (const auto *BI = dyn_cast<BitInit>(SI->Value))
399 Value.setBitVal(i, BI->getValue());
400
401 i += SegmentNumBits;
402 ++SI;
403 }
404
405 emitInstBits(IS, SS, Value, NumFixedValueWords);
406 IS << '\t' << "// " << R->getName() << "\n";
407 if (Value.getNumWords())
408 SS << '\t' << "// " << R->getName() << "\n";
409 }
410 IS.indent(4) << "{/*NumBits*/0, /*Index*/0}\n };\n";
411 SS.indent(4) << "UINT64_C(0)\n };\n";
412
413 OS << IS.str() << SS.str();
414 }
415
getInstructionCase(Record * R,CodeGenTarget & Target)416 std::string VarLenCodeEmitterGen::getInstructionCase(Record *R,
417 CodeGenTarget &Target) {
418 std::string Case;
419 if (const RecordVal *RV = R->getValue("EncodingInfos")) {
420 if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
421 const CodeGenHwModes &HWM = Target.getHwModes();
422 EncodingInfoByHwMode EBM(DI->getDef(), HWM);
423 Case += " switch (HwMode) {\n";
424 Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n";
425 for (auto &KV : EBM) {
426 Case += " case " + itostr(KV.first) + ": {\n";
427 Case += getInstructionCaseForEncoding(R, KV.second, Target);
428 Case += " break;\n";
429 Case += " }\n";
430 }
431 Case += " }\n";
432 return Case;
433 }
434 }
435 return getInstructionCaseForEncoding(R, R, Target);
436 }
437
getInstructionCaseForEncoding(Record * R,Record * EncodingDef,CodeGenTarget & Target)438 std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding(
439 Record *R, Record *EncodingDef, CodeGenTarget &Target) {
440 auto It = VarLenInsts.find(EncodingDef);
441 if (It == VarLenInsts.end())
442 PrintFatalError(EncodingDef, "Parsed encoding record not found");
443 const VarLenInst &VLI = It->second;
444 size_t BitWidth = VLI.size();
445
446 CodeGenInstruction &CGI = Target.getInstruction(R);
447
448 std::string Case;
449 raw_string_ostream SS(Case);
450 // Resize the scratch buffer.
451 if (BitWidth && !VLI.isFixedValueOnly())
452 SS.indent(6) << "Scratch = Scratch.zext(" << BitWidth << ");\n";
453 // Populate based value.
454 SS.indent(6) << "Inst = getInstBits(opcode);\n";
455
456 // Process each segment in VLI.
457 size_t Offset = 0U;
458 for (const auto &ES : VLI) {
459 unsigned NumBits = ES.BitWidth;
460 const Init *Val = ES.Value;
461 // If it's a StringInit or DagInit, it's a reference to an operand
462 // or part of an operand.
463 if (isa<StringInit>(Val) || isa<DagInit>(Val)) {
464 StringRef OperandName;
465 unsigned LoBit = 0U;
466 if (const auto *SV = dyn_cast<StringInit>(Val)) {
467 OperandName = SV->getValue();
468 } else {
469 // Normalized: (slice <operand name>, <high bit>, <low bit>)
470 const auto *DV = cast<DagInit>(Val);
471 OperandName = cast<StringInit>(DV->getArg(0))->getValue();
472 LoBit = static_cast<unsigned>(cast<IntInit>(DV->getArg(2))->getValue());
473 }
474
475 auto OpIdx = CGI.Operands.ParseOperandName(OperandName);
476 unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx);
477 StringRef CustomEncoder =
478 CGI.Operands[OpIdx.first].EncoderMethodNames[OpIdx.second];
479 if (ES.CustomEncoder.size())
480 CustomEncoder = ES.CustomEncoder;
481
482 SS.indent(6) << "Scratch.clearAllBits();\n";
483 SS.indent(6) << "// op: " << OperandName.drop_front(1) << "\n";
484 if (CustomEncoder.empty())
485 SS.indent(6) << "getMachineOpValue(MI, MI.getOperand("
486 << utostr(FlatOpIdx) << ")";
487 else
488 SS.indent(6) << CustomEncoder << "(MI, /*OpIdx=*/" << utostr(FlatOpIdx);
489
490 SS << ", /*Pos=*/" << utostr(Offset) << ", Scratch, Fixups, STI);\n";
491
492 SS.indent(6) << "Inst.insertBits("
493 << "Scratch.extractBits(" << utostr(NumBits) << ", "
494 << utostr(LoBit) << ")"
495 << ", " << Offset << ");\n";
496 }
497 Offset += NumBits;
498 }
499
500 StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");
501 if (!PostEmitter.empty())
502 SS.indent(6) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n";
503
504 return Case;
505 }
506
507 namespace llvm {
508
emitVarLenCodeEmitter(RecordKeeper & R,raw_ostream & OS)509 void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS) {
510 VarLenCodeEmitterGen(R).run(OS);
511 }
512
513 } // end namespace llvm
514