1 //===-- Target.cpp ----------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "../Target.h" 9 10 #include "../Error.h" 11 #include "../MmapUtils.h" 12 #include "../ParallelSnippetGenerator.h" 13 #include "../SerialSnippetGenerator.h" 14 #include "../SnippetGenerator.h" 15 #include "../SubprocessMemory.h" 16 #include "MCTargetDesc/X86BaseInfo.h" 17 #include "MCTargetDesc/X86MCTargetDesc.h" 18 #include "X86.h" 19 #include "X86Counter.h" 20 #include "X86RegisterInfo.h" 21 #include "llvm/ADT/Sequence.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/MC/MCInstBuilder.h" 24 #include "llvm/Support/Errc.h" 25 #include "llvm/Support/Error.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include "llvm/Support/FormatVariadic.h" 28 #include "llvm/TargetParser/Host.h" 29 30 #include <memory> 31 #include <string> 32 #include <vector> 33 #if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) 34 #include <immintrin.h> 35 #include <intrin.h> 36 #endif 37 #if defined(_MSC_VER) && defined(_M_X64) 38 #include <float.h> // For _clearfp in ~X86SavedState(). 39 #endif 40 41 #ifdef __linux__ 42 #ifdef __x86_64__ 43 #include <asm/prctl.h> 44 #endif // __x86_64__ 45 #include <sys/mman.h> 46 #include <sys/syscall.h> 47 #include <unistd.h> 48 #ifdef HAVE_LIBPFM 49 #include <perfmon/perf_event.h> 50 #endif // HAVE_LIBPFM 51 #endif 52 53 #define GET_AVAILABLE_OPCODE_CHECKER 54 #include "X86GenInstrInfo.inc" 55 56 namespace llvm { 57 namespace exegesis { 58 59 // If a positive value is specified, we are going to use the LBR in 60 // latency-mode. 61 // 62 // Note: 63 // - A small value is preferred, but too low a value could result in 64 // throttling. 65 // - A prime number is preferred to avoid always skipping certain blocks. 66 // 67 static cl::opt<unsigned> LbrSamplingPeriod( 68 "x86-lbr-sample-period", 69 cl::desc("The sample period (nbranches/sample), used for LBR sampling"), 70 cl::cat(BenchmarkOptions), cl::init(0)); 71 72 static cl::opt<bool> 73 DisableUpperSSERegisters("x86-disable-upper-sse-registers", 74 cl::desc("Disable XMM8-XMM15 register usage"), 75 cl::cat(BenchmarkOptions), cl::init(false)); 76 77 // FIXME: Validates that repetition-mode is loop if LBR is requested. 78 79 // Returns a non-null reason if we cannot handle the memory references in this 80 // instruction. 81 static const char *isInvalidMemoryInstr(const Instruction &Instr) { 82 switch (Instr.Description.TSFlags & X86II::FormMask) { 83 default: 84 return "Unknown FormMask value"; 85 // These have no memory access. 86 case X86II::Pseudo: 87 case X86II::RawFrm: 88 case X86II::AddCCFrm: 89 case X86II::PrefixByte: 90 case X86II::MRMDestReg: 91 case X86II::MRMSrcReg: 92 case X86II::MRMSrcReg4VOp3: 93 case X86II::MRMSrcRegOp4: 94 case X86II::MRMSrcRegCC: 95 case X86II::MRMXrCC: 96 case X86II::MRMr0: 97 case X86II::MRMXr: 98 case X86II::MRM0r: 99 case X86II::MRM1r: 100 case X86II::MRM2r: 101 case X86II::MRM3r: 102 case X86II::MRM4r: 103 case X86II::MRM5r: 104 case X86II::MRM6r: 105 case X86II::MRM7r: 106 case X86II::MRM0X: 107 case X86II::MRM1X: 108 case X86II::MRM2X: 109 case X86II::MRM3X: 110 case X86II::MRM4X: 111 case X86II::MRM5X: 112 case X86II::MRM6X: 113 case X86II::MRM7X: 114 case X86II::MRM_C0: 115 case X86II::MRM_C1: 116 case X86II::MRM_C2: 117 case X86II::MRM_C3: 118 case X86II::MRM_C4: 119 case X86II::MRM_C5: 120 case X86II::MRM_C6: 121 case X86II::MRM_C7: 122 case X86II::MRM_C8: 123 case X86II::MRM_C9: 124 case X86II::MRM_CA: 125 case X86II::MRM_CB: 126 case X86II::MRM_CC: 127 case X86II::MRM_CD: 128 case X86II::MRM_CE: 129 case X86II::MRM_CF: 130 case X86II::MRM_D0: 131 case X86II::MRM_D1: 132 case X86II::MRM_D2: 133 case X86II::MRM_D3: 134 case X86II::MRM_D4: 135 case X86II::MRM_D5: 136 case X86II::MRM_D6: 137 case X86II::MRM_D7: 138 case X86II::MRM_D8: 139 case X86II::MRM_D9: 140 case X86II::MRM_DA: 141 case X86II::MRM_DB: 142 case X86II::MRM_DC: 143 case X86II::MRM_DD: 144 case X86II::MRM_DE: 145 case X86II::MRM_DF: 146 case X86II::MRM_E0: 147 case X86II::MRM_E1: 148 case X86II::MRM_E2: 149 case X86II::MRM_E3: 150 case X86II::MRM_E4: 151 case X86II::MRM_E5: 152 case X86II::MRM_E6: 153 case X86II::MRM_E7: 154 case X86II::MRM_E8: 155 case X86II::MRM_E9: 156 case X86II::MRM_EA: 157 case X86II::MRM_EB: 158 case X86II::MRM_EC: 159 case X86II::MRM_ED: 160 case X86II::MRM_EE: 161 case X86II::MRM_EF: 162 case X86II::MRM_F0: 163 case X86II::MRM_F1: 164 case X86II::MRM_F2: 165 case X86II::MRM_F3: 166 case X86II::MRM_F4: 167 case X86II::MRM_F5: 168 case X86II::MRM_F6: 169 case X86II::MRM_F7: 170 case X86II::MRM_F8: 171 case X86II::MRM_F9: 172 case X86II::MRM_FA: 173 case X86II::MRM_FB: 174 case X86II::MRM_FC: 175 case X86II::MRM_FD: 176 case X86II::MRM_FE: 177 case X86II::MRM_FF: 178 case X86II::RawFrmImm8: 179 return nullptr; 180 case X86II::AddRegFrm: 181 return (Instr.Description.Opcode == X86::POP16r || 182 Instr.Description.Opcode == X86::POP32r || 183 Instr.Description.Opcode == X86::PUSH16r || 184 Instr.Description.Opcode == X86::PUSH32r) 185 ? "unsupported opcode: unsupported memory access" 186 : nullptr; 187 // These access memory and are handled. 188 case X86II::MRMDestMem: 189 case X86II::MRMSrcMem: 190 case X86II::MRMSrcMem4VOp3: 191 case X86II::MRMSrcMemOp4: 192 case X86II::MRMSrcMemCC: 193 case X86II::MRMXmCC: 194 case X86II::MRMXm: 195 case X86II::MRM0m: 196 case X86II::MRM1m: 197 case X86II::MRM2m: 198 case X86II::MRM3m: 199 case X86II::MRM4m: 200 case X86II::MRM5m: 201 case X86II::MRM6m: 202 case X86II::MRM7m: 203 return nullptr; 204 // These access memory and are not handled yet. 205 case X86II::RawFrmImm16: 206 case X86II::RawFrmMemOffs: 207 case X86II::RawFrmSrc: 208 case X86II::RawFrmDst: 209 case X86II::RawFrmDstSrc: 210 return "unsupported opcode: non uniform memory access"; 211 } 212 } 213 214 // If the opcode is invalid, returns a pointer to a character literal indicating 215 // the reason. nullptr indicates a valid opcode. 216 static const char *isInvalidOpcode(const Instruction &Instr) { 217 const auto OpcodeName = Instr.Name; 218 if ((Instr.Description.TSFlags & X86II::FormMask) == X86II::Pseudo) 219 return "unsupported opcode: pseudo instruction"; 220 if ((OpcodeName.starts_with("POP") && !OpcodeName.starts_with("POPCNT")) || 221 OpcodeName.starts_with("PUSH") || 222 OpcodeName.starts_with("ADJCALLSTACK") || OpcodeName.starts_with("LEAVE")) 223 return "unsupported opcode: Push/Pop/AdjCallStack/Leave"; 224 switch (Instr.Description.Opcode) { 225 case X86::LFS16rm: 226 case X86::LFS32rm: 227 case X86::LFS64rm: 228 case X86::LGS16rm: 229 case X86::LGS32rm: 230 case X86::LGS64rm: 231 case X86::LSS16rm: 232 case X86::LSS32rm: 233 case X86::LSS64rm: 234 case X86::SYSENTER: 235 case X86::WRFSBASE: 236 case X86::WRFSBASE64: 237 return "unsupported opcode"; 238 default: 239 break; 240 } 241 if (const auto reason = isInvalidMemoryInstr(Instr)) 242 return reason; 243 // We do not handle instructions with OPERAND_PCREL. 244 for (const Operand &Op : Instr.Operands) 245 if (Op.isExplicit() && 246 Op.getExplicitOperandInfo().OperandType == MCOI::OPERAND_PCREL) 247 return "unsupported opcode: PC relative operand"; 248 // We do not handle second-form X87 instructions. We only handle first-form 249 // ones (_Fp), see comment in X86InstrFPStack.td. 250 for (const Operand &Op : Instr.Operands) 251 if (Op.isReg() && Op.isExplicit() && 252 Op.getExplicitOperandInfo().RegClass == X86::RSTRegClassID) 253 return "unsupported second-form X87 instruction"; 254 return nullptr; 255 } 256 257 static unsigned getX86FPFlags(const Instruction &Instr) { 258 return Instr.Description.TSFlags & X86II::FPTypeMask; 259 } 260 261 // Helper to fill a memory operand with a value. 262 static void setMemOp(InstructionTemplate &IT, int OpIdx, 263 const MCOperand &OpVal) { 264 const auto Op = IT.getInstr().Operands[OpIdx]; 265 assert(Op.isExplicit() && "invalid memory pattern"); 266 IT.getValueFor(Op) = OpVal; 267 } 268 269 // Common (latency, uops) code for LEA templates. `GetDestReg` takes the 270 // addressing base and index registers and returns the LEA destination register. 271 static Expected<std::vector<CodeTemplate>> generateLEATemplatesCommon( 272 const Instruction &Instr, const BitVector &ForbiddenRegisters, 273 const LLVMState &State, const SnippetGenerator::Options &Opts, 274 std::function<void(unsigned, unsigned, BitVector &CandidateDestRegs)> 275 RestrictDestRegs) { 276 assert(Instr.Operands.size() == 6 && "invalid LEA"); 277 assert(X86II::getMemoryOperandNo(Instr.Description.TSFlags) == 1 && 278 "invalid LEA"); 279 280 constexpr const int kDestOp = 0; 281 constexpr const int kBaseOp = 1; 282 constexpr const int kIndexOp = 3; 283 auto PossibleDestRegs = 284 Instr.Operands[kDestOp].getRegisterAliasing().sourceBits(); 285 remove(PossibleDestRegs, ForbiddenRegisters); 286 auto PossibleBaseRegs = 287 Instr.Operands[kBaseOp].getRegisterAliasing().sourceBits(); 288 remove(PossibleBaseRegs, ForbiddenRegisters); 289 auto PossibleIndexRegs = 290 Instr.Operands[kIndexOp].getRegisterAliasing().sourceBits(); 291 remove(PossibleIndexRegs, ForbiddenRegisters); 292 293 const auto &RegInfo = State.getRegInfo(); 294 std::vector<CodeTemplate> Result; 295 for (const unsigned BaseReg : PossibleBaseRegs.set_bits()) { 296 for (const unsigned IndexReg : PossibleIndexRegs.set_bits()) { 297 for (int LogScale = 0; LogScale <= 3; ++LogScale) { 298 // FIXME: Add an option for controlling how we explore immediates. 299 for (const int Disp : {0, 42}) { 300 InstructionTemplate IT(&Instr); 301 const int64_t Scale = 1ull << LogScale; 302 setMemOp(IT, 1, MCOperand::createReg(BaseReg)); 303 setMemOp(IT, 2, MCOperand::createImm(Scale)); 304 setMemOp(IT, 3, MCOperand::createReg(IndexReg)); 305 setMemOp(IT, 4, MCOperand::createImm(Disp)); 306 // SegmentReg must be 0 for LEA. 307 setMemOp(IT, 5, MCOperand::createReg(0)); 308 309 // Output reg candidates are selected by the caller. 310 auto PossibleDestRegsNow = PossibleDestRegs; 311 RestrictDestRegs(BaseReg, IndexReg, PossibleDestRegsNow); 312 assert(PossibleDestRegsNow.set_bits().begin() != 313 PossibleDestRegsNow.set_bits().end() && 314 "no remaining registers"); 315 setMemOp( 316 IT, 0, 317 MCOperand::createReg(*PossibleDestRegsNow.set_bits().begin())); 318 319 CodeTemplate CT; 320 CT.Instructions.push_back(std::move(IT)); 321 CT.Config = formatv("{3}(%{0}, %{1}, {2})", RegInfo.getName(BaseReg), 322 RegInfo.getName(IndexReg), Scale, Disp) 323 .str(); 324 Result.push_back(std::move(CT)); 325 if (Result.size() >= Opts.MaxConfigsPerOpcode) 326 return std::move(Result); 327 } 328 } 329 } 330 } 331 332 return std::move(Result); 333 } 334 335 namespace { 336 class X86SerialSnippetGenerator : public SerialSnippetGenerator { 337 public: 338 using SerialSnippetGenerator::SerialSnippetGenerator; 339 340 Expected<std::vector<CodeTemplate>> 341 generateCodeTemplates(InstructionTemplate Variant, 342 const BitVector &ForbiddenRegisters) const override; 343 }; 344 } // namespace 345 346 Expected<std::vector<CodeTemplate>> 347 X86SerialSnippetGenerator::generateCodeTemplates( 348 InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { 349 const Instruction &Instr = Variant.getInstr(); 350 351 if (const auto reason = isInvalidOpcode(Instr)) 352 return make_error<Failure>(reason); 353 354 // LEA gets special attention. 355 const auto Opcode = Instr.Description.getOpcode(); 356 if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) { 357 return generateLEATemplatesCommon( 358 Instr, ForbiddenRegisters, State, Opts, 359 [this](unsigned BaseReg, unsigned IndexReg, 360 BitVector &CandidateDestRegs) { 361 // We just select a destination register that aliases the base 362 // register. 363 CandidateDestRegs &= 364 State.getRATC().getRegister(BaseReg).aliasedBits(); 365 }); 366 } 367 368 if (Instr.hasMemoryOperands()) 369 return make_error<Failure>( 370 "unsupported memory operand in latency measurements"); 371 372 switch (getX86FPFlags(Instr)) { 373 case X86II::NotFP: 374 return SerialSnippetGenerator::generateCodeTemplates(Variant, 375 ForbiddenRegisters); 376 case X86II::ZeroArgFP: 377 case X86II::OneArgFP: 378 case X86II::SpecialFP: 379 case X86II::CompareFP: 380 case X86II::CondMovFP: 381 return make_error<Failure>("Unsupported x87 Instruction"); 382 case X86II::OneArgFPRW: 383 case X86II::TwoArgFP: 384 // These are instructions like 385 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW) 386 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP) 387 // They are intrinsically serial and do not modify the state of the stack. 388 return generateSelfAliasingCodeTemplates(Variant, ForbiddenRegisters); 389 default: 390 llvm_unreachable("Unknown FP Type!"); 391 } 392 } 393 394 namespace { 395 class X86ParallelSnippetGenerator : public ParallelSnippetGenerator { 396 public: 397 using ParallelSnippetGenerator::ParallelSnippetGenerator; 398 399 Expected<std::vector<CodeTemplate>> 400 generateCodeTemplates(InstructionTemplate Variant, 401 const BitVector &ForbiddenRegisters) const override; 402 }; 403 404 } // namespace 405 406 Expected<std::vector<CodeTemplate>> 407 X86ParallelSnippetGenerator::generateCodeTemplates( 408 InstructionTemplate Variant, const BitVector &ForbiddenRegisters) const { 409 const Instruction &Instr = Variant.getInstr(); 410 411 if (const auto reason = isInvalidOpcode(Instr)) 412 return make_error<Failure>(reason); 413 414 // LEA gets special attention. 415 const auto Opcode = Instr.Description.getOpcode(); 416 if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r) { 417 return generateLEATemplatesCommon( 418 Instr, ForbiddenRegisters, State, Opts, 419 [this](unsigned BaseReg, unsigned IndexReg, 420 BitVector &CandidateDestRegs) { 421 // Any destination register that is not used for addressing is fine. 422 remove(CandidateDestRegs, 423 State.getRATC().getRegister(BaseReg).aliasedBits()); 424 remove(CandidateDestRegs, 425 State.getRATC().getRegister(IndexReg).aliasedBits()); 426 }); 427 } 428 429 switch (getX86FPFlags(Instr)) { 430 case X86II::NotFP: 431 return ParallelSnippetGenerator::generateCodeTemplates(Variant, 432 ForbiddenRegisters); 433 case X86II::ZeroArgFP: 434 case X86II::OneArgFP: 435 case X86II::SpecialFP: 436 return make_error<Failure>("Unsupported x87 Instruction"); 437 case X86II::OneArgFPRW: 438 case X86II::TwoArgFP: 439 // These are instructions like 440 // - `ST(0) = fsqrt(ST(0))` (OneArgFPRW) 441 // - `ST(0) = ST(0) + ST(i)` (TwoArgFP) 442 // They are intrinsically serial and do not modify the state of the stack. 443 // We generate the same code for latency and uops. 444 return generateSelfAliasingCodeTemplates(Variant, ForbiddenRegisters); 445 case X86II::CompareFP: 446 case X86II::CondMovFP: 447 // We can compute uops for any FP instruction that does not grow or shrink 448 // the stack (either do not touch the stack or push as much as they pop). 449 return generateUnconstrainedCodeTemplates( 450 Variant, "instruction does not grow/shrink the FP stack"); 451 default: 452 llvm_unreachable("Unknown FP Type!"); 453 } 454 } 455 456 static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) { 457 switch (RegBitWidth) { 458 case 8: 459 return X86::MOV8ri; 460 case 16: 461 return X86::MOV16ri; 462 case 32: 463 return X86::MOV32ri; 464 case 64: 465 return X86::MOV64ri; 466 } 467 llvm_unreachable("Invalid Value Width"); 468 } 469 470 // Generates instruction to load an immediate value into a register. 471 static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth, 472 const APInt &Value) { 473 if (Value.getBitWidth() > RegBitWidth) 474 llvm_unreachable("Value must fit in the Register"); 475 return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth)) 476 .addReg(Reg) 477 .addImm(Value.getZExtValue()); 478 } 479 480 // Allocates scratch memory on the stack. 481 static MCInst allocateStackSpace(unsigned Bytes) { 482 return MCInstBuilder(X86::SUB64ri8) 483 .addReg(X86::RSP) 484 .addReg(X86::RSP) 485 .addImm(Bytes); 486 } 487 488 // Fills scratch memory at offset `OffsetBytes` with value `Imm`. 489 static MCInst fillStackSpace(unsigned MovOpcode, unsigned OffsetBytes, 490 uint64_t Imm) { 491 return MCInstBuilder(MovOpcode) 492 // Address = ESP 493 .addReg(X86::RSP) // BaseReg 494 .addImm(1) // ScaleAmt 495 .addReg(0) // IndexReg 496 .addImm(OffsetBytes) // Disp 497 .addReg(0) // Segment 498 // Immediate. 499 .addImm(Imm); 500 } 501 502 // Loads scratch memory into register `Reg` using opcode `RMOpcode`. 503 static MCInst loadToReg(MCRegister Reg, unsigned RMOpcode) { 504 return MCInstBuilder(RMOpcode) 505 .addReg(Reg) 506 // Address = ESP 507 .addReg(X86::RSP) // BaseReg 508 .addImm(1) // ScaleAmt 509 .addReg(0) // IndexReg 510 .addImm(0) // Disp 511 .addReg(0); // Segment 512 } 513 514 // Releases scratch memory. 515 static MCInst releaseStackSpace(unsigned Bytes) { 516 return MCInstBuilder(X86::ADD64ri8) 517 .addReg(X86::RSP) 518 .addReg(X86::RSP) 519 .addImm(Bytes); 520 } 521 522 // Reserves some space on the stack, fills it with the content of the provided 523 // constant and provide methods to load the stack value into a register. 524 namespace { 525 struct ConstantInliner { 526 explicit ConstantInliner(const APInt &Constant) : Constant_(Constant) {} 527 528 std::vector<MCInst> loadAndFinalize(MCRegister Reg, unsigned RegBitWidth, 529 unsigned Opcode); 530 531 std::vector<MCInst> loadX87STAndFinalize(MCRegister Reg); 532 533 std::vector<MCInst> loadX87FPAndFinalize(MCRegister Reg); 534 535 std::vector<MCInst> popFlagAndFinalize(); 536 537 std::vector<MCInst> loadImplicitRegAndFinalize(unsigned Opcode, 538 unsigned Value); 539 540 std::vector<MCInst> loadDirectionFlagAndFinalize(); 541 542 private: 543 ConstantInliner &add(const MCInst &Inst) { 544 Instructions.push_back(Inst); 545 return *this; 546 } 547 548 void initStack(unsigned Bytes); 549 550 static constexpr const unsigned kF80Bytes = 10; // 80 bits. 551 552 APInt Constant_; 553 std::vector<MCInst> Instructions; 554 }; 555 } // namespace 556 557 std::vector<MCInst> ConstantInliner::loadAndFinalize(MCRegister Reg, 558 unsigned RegBitWidth, 559 unsigned Opcode) { 560 assert((RegBitWidth & 7) == 0 && "RegBitWidth must be a multiple of 8 bits"); 561 initStack(RegBitWidth / 8); 562 add(loadToReg(Reg, Opcode)); 563 add(releaseStackSpace(RegBitWidth / 8)); 564 return std::move(Instructions); 565 } 566 567 std::vector<MCInst> ConstantInliner::loadX87STAndFinalize(MCRegister Reg) { 568 initStack(kF80Bytes); 569 add(MCInstBuilder(X86::LD_F80m) 570 // Address = ESP 571 .addReg(X86::RSP) // BaseReg 572 .addImm(1) // ScaleAmt 573 .addReg(0) // IndexReg 574 .addImm(0) // Disp 575 .addReg(0)); // Segment 576 if (Reg != X86::ST0) 577 add(MCInstBuilder(X86::ST_Frr).addReg(Reg)); 578 add(releaseStackSpace(kF80Bytes)); 579 return std::move(Instructions); 580 } 581 582 std::vector<MCInst> ConstantInliner::loadX87FPAndFinalize(MCRegister Reg) { 583 initStack(kF80Bytes); 584 add(MCInstBuilder(X86::LD_Fp80m) 585 .addReg(Reg) 586 // Address = ESP 587 .addReg(X86::RSP) // BaseReg 588 .addImm(1) // ScaleAmt 589 .addReg(0) // IndexReg 590 .addImm(0) // Disp 591 .addReg(0)); // Segment 592 add(releaseStackSpace(kF80Bytes)); 593 return std::move(Instructions); 594 } 595 596 std::vector<MCInst> ConstantInliner::popFlagAndFinalize() { 597 initStack(8); 598 add(MCInstBuilder(X86::POPF64)); 599 return std::move(Instructions); 600 } 601 602 std::vector<MCInst> 603 ConstantInliner::loadImplicitRegAndFinalize(unsigned Opcode, unsigned Value) { 604 add(allocateStackSpace(4)); 605 add(fillStackSpace(X86::MOV32mi, 0, Value)); // Mask all FP exceptions 606 add(MCInstBuilder(Opcode) 607 // Address = ESP 608 .addReg(X86::RSP) // BaseReg 609 .addImm(1) // ScaleAmt 610 .addReg(0) // IndexReg 611 .addImm(0) // Disp 612 .addReg(0)); // Segment 613 add(releaseStackSpace(4)); 614 return std::move(Instructions); 615 } 616 617 std::vector<MCInst> ConstantInliner::loadDirectionFlagAndFinalize() { 618 if (Constant_.isZero()) 619 add(MCInstBuilder(X86::CLD)); 620 else if (Constant_.isOne()) 621 add(MCInstBuilder(X86::STD)); 622 623 return std::move(Instructions); 624 } 625 626 void ConstantInliner::initStack(unsigned Bytes) { 627 assert(Constant_.getBitWidth() <= Bytes * 8 && 628 "Value does not have the correct size"); 629 const APInt WideConstant = Constant_.getBitWidth() < Bytes * 8 630 ? Constant_.sext(Bytes * 8) 631 : Constant_; 632 add(allocateStackSpace(Bytes)); 633 size_t ByteOffset = 0; 634 for (; Bytes - ByteOffset >= 4; ByteOffset += 4) 635 add(fillStackSpace( 636 X86::MOV32mi, ByteOffset, 637 WideConstant.extractBits(32, ByteOffset * 8).getZExtValue())); 638 if (Bytes - ByteOffset >= 2) { 639 add(fillStackSpace( 640 X86::MOV16mi, ByteOffset, 641 WideConstant.extractBits(16, ByteOffset * 8).getZExtValue())); 642 ByteOffset += 2; 643 } 644 if (Bytes - ByteOffset >= 1) 645 add(fillStackSpace( 646 X86::MOV8mi, ByteOffset, 647 WideConstant.extractBits(8, ByteOffset * 8).getZExtValue())); 648 } 649 650 #include "X86GenExegesis.inc" 651 652 namespace { 653 654 class X86SavedState : public ExegesisTarget::SavedState { 655 public: 656 X86SavedState() { 657 #if defined(_MSC_VER) && defined(_M_X64) 658 _fxsave64(FPState); 659 Eflags = __readeflags(); 660 #elif defined(__GNUC__) && defined(__x86_64__) 661 __builtin_ia32_fxsave64(FPState); 662 Eflags = __builtin_ia32_readeflags_u64(); 663 #else 664 report_fatal_error("X86 exegesis running on unsupported target"); 665 #endif 666 } 667 668 ~X86SavedState() { 669 // Restoring the X87 state does not flush pending exceptions, make sure 670 // these exceptions are flushed now. 671 #if defined(_MSC_VER) && defined(_M_X64) 672 _clearfp(); 673 _fxrstor64(FPState); 674 __writeeflags(Eflags); 675 #elif defined(__GNUC__) && defined(__x86_64__) 676 asm volatile("fwait"); 677 __builtin_ia32_fxrstor64(FPState); 678 __builtin_ia32_writeeflags_u64(Eflags); 679 #else 680 report_fatal_error("X86 exegesis running on unsupported target"); 681 #endif 682 } 683 684 private: 685 #if defined(__x86_64__) || defined(_M_X64) 686 alignas(16) char FPState[512]; 687 uint64_t Eflags; 688 #endif 689 }; 690 691 class ExegesisX86Target : public ExegesisTarget { 692 public: 693 ExegesisX86Target() 694 : ExegesisTarget(X86CpuPfmCounters, X86_MC::isOpcodeAvailable) {} 695 696 Expected<std::unique_ptr<pfm::CounterGroup>> 697 createCounter(StringRef CounterName, const LLVMState &State, 698 ArrayRef<const char *> ValidationCounters, 699 const pid_t ProcessID) const override { 700 // If LbrSamplingPeriod was provided, then ignore the 701 // CounterName because we only have one for LBR. 702 if (LbrSamplingPeriod > 0) { 703 // Can't use LBR without HAVE_LIBPFM, LIBPFM_HAS_FIELD_CYCLES, or without 704 // __linux__ (for now) 705 #if defined(HAVE_LIBPFM) && defined(LIBPFM_HAS_FIELD_CYCLES) && \ 706 defined(__linux__) 707 // TODO(boomanaiden154): Add in support for using validation counters when 708 // using LBR counters. 709 if (ValidationCounters.size() > 0) 710 return make_error<StringError>( 711 "Using LBR is not currently supported with validation counters", 712 errc::invalid_argument); 713 714 return std::make_unique<X86LbrCounter>( 715 X86LbrPerfEvent(LbrSamplingPeriod)); 716 #else 717 return make_error<StringError>( 718 "LBR counter requested without HAVE_LIBPFM, LIBPFM_HAS_FIELD_CYCLES, " 719 "or running on Linux.", 720 errc::invalid_argument); 721 #endif 722 } 723 return ExegesisTarget::createCounter(CounterName, State, ValidationCounters, 724 ProcessID); 725 } 726 727 enum ArgumentRegisters { CodeSize = X86::R12, AuxiliaryMemoryFD = X86::R13 }; 728 729 private: 730 void addTargetSpecificPasses(PassManagerBase &PM) const override; 731 732 MCRegister getScratchMemoryRegister(const Triple &TT) const override; 733 734 MCRegister getDefaultLoopCounterRegister(const Triple &) const override; 735 736 unsigned getMaxMemoryAccessSize() const override { return 64; } 737 738 Error randomizeTargetMCOperand(const Instruction &Instr, const Variable &Var, 739 MCOperand &AssignedValue, 740 const BitVector &ForbiddenRegs) const override; 741 742 void fillMemoryOperands(InstructionTemplate &IT, MCRegister Reg, 743 unsigned Offset) const override; 744 745 void decrementLoopCounterAndJump(MachineBasicBlock &MBB, 746 MachineBasicBlock &TargetMBB, 747 const MCInstrInfo &MII, 748 MCRegister LoopRegister) const override; 749 750 std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, MCRegister Reg, 751 const APInt &Value) const override; 752 753 #ifdef __linux__ 754 void generateLowerMunmap(std::vector<MCInst> &GeneratedCode) const override; 755 756 void generateUpperMunmap(std::vector<MCInst> &GeneratedCode) const override; 757 758 std::vector<MCInst> generateExitSyscall(unsigned ExitCode) const override; 759 760 std::vector<MCInst> 761 generateMmap(uintptr_t Address, size_t Length, 762 uintptr_t FileDescriptorAddress) const override; 763 764 void generateMmapAuxMem(std::vector<MCInst> &GeneratedCode) const override; 765 766 void moveArgumentRegisters(std::vector<MCInst> &GeneratedCode) const override; 767 768 std::vector<MCInst> generateMemoryInitialSetup() const override; 769 770 std::vector<MCInst> setStackRegisterToAuxMem() const override; 771 772 uintptr_t getAuxiliaryMemoryStartAddress() const override; 773 774 std::vector<MCInst> configurePerfCounter(long Request, bool SaveRegisters) const override; 775 776 std::vector<MCRegister> getArgumentRegisters() const override; 777 778 std::vector<MCRegister> getRegistersNeedSaving() const override; 779 #endif // __linux__ 780 781 ArrayRef<MCPhysReg> getUnavailableRegisters() const override { 782 if (DisableUpperSSERegisters) 783 return ArrayRef(kUnavailableRegistersSSE); 784 785 return ArrayRef(kUnavailableRegisters); 786 } 787 788 bool allowAsBackToBack(const Instruction &Instr) const override { 789 const unsigned Opcode = Instr.Description.Opcode; 790 return !isInvalidOpcode(Instr) && Opcode != X86::LEA64r && 791 Opcode != X86::LEA64_32r && Opcode != X86::LEA16r; 792 } 793 794 std::vector<InstructionTemplate> 795 generateInstructionVariants(const Instruction &Instr, 796 unsigned MaxConfigsPerOpcode) const override; 797 798 std::unique_ptr<SnippetGenerator> createSerialSnippetGenerator( 799 const LLVMState &State, 800 const SnippetGenerator::Options &Opts) const override { 801 return std::make_unique<X86SerialSnippetGenerator>(State, Opts); 802 } 803 804 std::unique_ptr<SnippetGenerator> createParallelSnippetGenerator( 805 const LLVMState &State, 806 const SnippetGenerator::Options &Opts) const override { 807 return std::make_unique<X86ParallelSnippetGenerator>(State, Opts); 808 } 809 810 bool matchesArch(Triple::ArchType Arch) const override { 811 return Arch == Triple::x86_64 || Arch == Triple::x86; 812 } 813 814 Error checkFeatureSupport() const override { 815 // LBR is the only feature we conditionally support now. 816 // So if LBR is not requested, then we should be able to run the benchmarks. 817 if (LbrSamplingPeriod == 0) 818 return Error::success(); 819 820 #if defined(__linux__) && defined(HAVE_LIBPFM) && \ 821 defined(LIBPFM_HAS_FIELD_CYCLES) 822 // FIXME: Fix this. 823 // https://bugs.llvm.org/show_bug.cgi?id=48918 824 // For now, only do the check if we see an Intel machine because 825 // the counter uses some intel-specific magic and it could 826 // be confuse and think an AMD machine actually has LBR support. 827 #if defined(__i386__) || defined(_M_IX86) || defined(__x86_64__) || \ 828 defined(_M_X64) 829 using namespace sys::detail::x86; 830 831 if (getVendorSignature() == VendorSignatures::GENUINE_INTEL) 832 // If the kernel supports it, the hardware still may not have it. 833 return X86LbrCounter::checkLbrSupport(); 834 #else 835 report_fatal_error("Running X86 exegesis on unsupported target"); 836 #endif 837 #endif 838 return make_error<StringError>( 839 "LBR not supported on this kernel and/or platform", 840 errc::not_supported); 841 } 842 843 std::unique_ptr<SavedState> withSavedState() const override { 844 return std::make_unique<X86SavedState>(); 845 } 846 847 static const MCPhysReg kUnavailableRegisters[4]; 848 static const MCPhysReg kUnavailableRegistersSSE[12]; 849 }; 850 851 // We disable a few registers that cannot be encoded on instructions with a REX 852 // prefix. 853 const MCPhysReg ExegesisX86Target::kUnavailableRegisters[4] = { 854 X86::AH, X86::BH, X86::CH, X86::DH}; 855 856 // Optionally, also disable the upper (x86_64) SSE registers to reduce frontend 857 // decoder load. 858 const MCPhysReg ExegesisX86Target::kUnavailableRegistersSSE[12] = { 859 X86::AH, X86::BH, X86::CH, X86::DH, X86::XMM8, X86::XMM9, 860 X86::XMM10, X86::XMM11, X86::XMM12, X86::XMM13, X86::XMM14, X86::XMM15}; 861 862 // We're using one of R8-R15 because these registers are never hardcoded in 863 // instructions (e.g. MOVS writes to EDI, ESI, EDX), so they have less 864 // conflicts. 865 constexpr const MCPhysReg kDefaultLoopCounterReg = X86::R8; 866 867 } // namespace 868 869 void ExegesisX86Target::addTargetSpecificPasses(PassManagerBase &PM) const { 870 // Lowers FP pseudo-instructions, e.g. ABS_Fp32 -> ABS_F. 871 PM.add(createX86FloatingPointStackifierPass()); 872 } 873 874 MCRegister ExegesisX86Target::getScratchMemoryRegister(const Triple &TT) const { 875 if (!TT.isArch64Bit()) { 876 // FIXME: This would require popping from the stack, so we would have to 877 // add some additional setup code. 878 return MCRegister(); 879 } 880 return TT.isOSWindows() ? X86::RCX : X86::RDI; 881 } 882 883 MCRegister 884 ExegesisX86Target::getDefaultLoopCounterRegister(const Triple &TT) const { 885 if (!TT.isArch64Bit()) { 886 return MCRegister(); 887 } 888 return kDefaultLoopCounterReg; 889 } 890 891 Error ExegesisX86Target::randomizeTargetMCOperand( 892 const Instruction &Instr, const Variable &Var, MCOperand &AssignedValue, 893 const BitVector &ForbiddenRegs) const { 894 const Operand &Op = Instr.getPrimaryOperand(Var); 895 switch (Op.getExplicitOperandInfo().OperandType) { 896 case X86::OperandType::OPERAND_COND_CODE: 897 AssignedValue = 898 MCOperand::createImm(randomIndex(X86::CondCode::LAST_VALID_COND)); 899 return Error::success(); 900 case X86::OperandType::OPERAND_ROUNDING_CONTROL: 901 AssignedValue = 902 MCOperand::createImm(randomIndex(X86::STATIC_ROUNDING::TO_ZERO)); 903 return Error::success(); 904 default: 905 break; 906 } 907 return make_error<Failure>( 908 Twine("unimplemented operand type ") 909 .concat(Twine(Op.getExplicitOperandInfo().OperandType))); 910 } 911 912 void ExegesisX86Target::fillMemoryOperands(InstructionTemplate &IT, 913 MCRegister Reg, 914 unsigned Offset) const { 915 assert(!isInvalidMemoryInstr(IT.getInstr()) && 916 "fillMemoryOperands requires a valid memory instruction"); 917 int MemOpIdx = X86II::getMemoryOperandNo(IT.getInstr().Description.TSFlags); 918 assert(MemOpIdx >= 0 && "invalid memory operand index"); 919 // getMemoryOperandNo() ignores tied operands, so we have to add them back. 920 MemOpIdx += X86II::getOperandBias(IT.getInstr().Description); 921 setMemOp(IT, MemOpIdx + 0, MCOperand::createReg(Reg)); // BaseReg 922 setMemOp(IT, MemOpIdx + 1, MCOperand::createImm(1)); // ScaleAmt 923 setMemOp(IT, MemOpIdx + 2, MCOperand::createReg(0)); // IndexReg 924 setMemOp(IT, MemOpIdx + 3, MCOperand::createImm(Offset)); // Disp 925 setMemOp(IT, MemOpIdx + 4, MCOperand::createReg(0)); // Segment 926 } 927 928 void ExegesisX86Target::decrementLoopCounterAndJump( 929 MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB, 930 const MCInstrInfo &MII, MCRegister LoopRegister) const { 931 BuildMI(&MBB, DebugLoc(), MII.get(X86::ADD64ri8)) 932 .addDef(LoopRegister) 933 .addUse(LoopRegister) 934 .addImm(-1); 935 BuildMI(&MBB, DebugLoc(), MII.get(X86::JCC_1)) 936 .addMBB(&TargetMBB) 937 .addImm(X86::COND_NE); 938 } 939 940 void generateRegisterStackPush(unsigned int Register, 941 std::vector<MCInst> &GeneratedCode) { 942 GeneratedCode.push_back(MCInstBuilder(X86::PUSH64r).addReg(Register)); 943 } 944 945 void generateRegisterStackPop(unsigned int Register, 946 std::vector<MCInst> &GeneratedCode) { 947 GeneratedCode.push_back(MCInstBuilder(X86::POP64r).addReg(Register)); 948 } 949 950 void generateSyscall(long SyscallNumber, std::vector<MCInst> &GeneratedCode) { 951 GeneratedCode.push_back( 952 loadImmediate(X86::RAX, 64, APInt(64, SyscallNumber))); 953 GeneratedCode.push_back(MCInstBuilder(X86::SYSCALL)); 954 } 955 956 // The functions below for saving and restoring system call registers are only 957 // used when llvm-exegesis is built on Linux. 958 #ifdef __linux__ 959 constexpr std::array<unsigned, 6> SyscallArgumentRegisters{ 960 X86::RDI, X86::RSI, X86::RDX, X86::R10, X86::R8, X86::R9}; 961 962 static void saveSyscallRegisters(std::vector<MCInst> &GeneratedCode, 963 unsigned ArgumentCount) { 964 assert(ArgumentCount <= 6 && 965 "System calls only X86-64 Linux can only take six arguments"); 966 // Preserve RCX and R11 (Clobbered by the system call). 967 generateRegisterStackPush(X86::RCX, GeneratedCode); 968 generateRegisterStackPush(X86::R11, GeneratedCode); 969 // Preserve RAX (used for the syscall number/return value). 970 generateRegisterStackPush(X86::RAX, GeneratedCode); 971 // Preserve the registers used to pass arguments to the system call. 972 for (unsigned I = 0; I < ArgumentCount; ++I) 973 generateRegisterStackPush(SyscallArgumentRegisters[I], GeneratedCode); 974 } 975 976 static void restoreSyscallRegisters(std::vector<MCInst> &GeneratedCode, 977 unsigned ArgumentCount) { 978 assert(ArgumentCount <= 6 && 979 "System calls only X86-64 Linux can only take six arguments"); 980 // Restore the argument registers, in the opposite order of the way they are 981 // saved. 982 for (unsigned I = ArgumentCount; I > 0; --I) { 983 generateRegisterStackPop(SyscallArgumentRegisters[I - 1], GeneratedCode); 984 } 985 generateRegisterStackPop(X86::RAX, GeneratedCode); 986 generateRegisterStackPop(X86::R11, GeneratedCode); 987 generateRegisterStackPop(X86::RCX, GeneratedCode); 988 } 989 #endif // __linux__ 990 991 static std::vector<MCInst> loadImmediateSegmentRegister(MCRegister Reg, 992 const APInt &Value) { 993 #if defined(__x86_64__) && defined(__linux__) 994 assert(Value.getBitWidth() <= 64 && "Value must fit in the register."); 995 std::vector<MCInst> loadSegmentRegisterCode; 996 // Preserve the syscall registers here as we don't 997 // want to make any assumptions about the ordering of what registers are 998 // loaded in first, and we might have already loaded in registers that we are 999 // going to be clobbering here. 1000 saveSyscallRegisters(loadSegmentRegisterCode, 2); 1001 // Generate the instructions to make the arch_prctl system call to set 1002 // the registers. 1003 int SyscallCode = 0; 1004 if (Reg == X86::FS) 1005 SyscallCode = ARCH_SET_FS; 1006 else if (Reg == X86::GS) 1007 SyscallCode = ARCH_SET_GS; 1008 else 1009 llvm_unreachable("Only the segment registers GS and FS are supported"); 1010 loadSegmentRegisterCode.push_back( 1011 loadImmediate(X86::RDI, 64, APInt(64, SyscallCode))); 1012 loadSegmentRegisterCode.push_back(loadImmediate(X86::RSI, 64, Value)); 1013 generateSyscall(SYS_arch_prctl, loadSegmentRegisterCode); 1014 // Restore the registers in reverse order 1015 restoreSyscallRegisters(loadSegmentRegisterCode, 2); 1016 return loadSegmentRegisterCode; 1017 #else 1018 llvm_unreachable("Loading immediate segment registers is only supported with " 1019 "x86-64 llvm-exegesis"); 1020 #endif // defined(__x86_64__) && defined(__linux__) 1021 } 1022 1023 std::vector<MCInst> ExegesisX86Target::setRegTo(const MCSubtargetInfo &STI, 1024 MCRegister Reg, 1025 const APInt &Value) const { 1026 if (X86::SEGMENT_REGRegClass.contains(Reg)) 1027 return loadImmediateSegmentRegister(Reg, Value); 1028 if (X86::GR8RegClass.contains(Reg)) 1029 return {loadImmediate(Reg, 8, Value)}; 1030 if (X86::GR16RegClass.contains(Reg)) 1031 return {loadImmediate(Reg, 16, Value)}; 1032 if (X86::GR32RegClass.contains(Reg)) 1033 return {loadImmediate(Reg, 32, Value)}; 1034 if (X86::GR64RegClass.contains(Reg)) 1035 return {loadImmediate(Reg, 64, Value)}; 1036 if (X86::VK8RegClass.contains(Reg) || X86::VK16RegClass.contains(Reg) || 1037 X86::VK32RegClass.contains(Reg) || X86::VK64RegClass.contains(Reg)) { 1038 switch (Value.getBitWidth()) { 1039 case 8: 1040 if (STI.getFeatureBits()[X86::FeatureDQI]) { 1041 ConstantInliner CI(Value); 1042 return CI.loadAndFinalize(Reg, Value.getBitWidth(), X86::KMOVBkm); 1043 } 1044 [[fallthrough]]; 1045 case 16: 1046 if (STI.getFeatureBits()[X86::FeatureAVX512]) { 1047 ConstantInliner CI(Value.zextOrTrunc(16)); 1048 return CI.loadAndFinalize(Reg, 16, X86::KMOVWkm); 1049 } 1050 break; 1051 case 32: 1052 if (STI.getFeatureBits()[X86::FeatureBWI]) { 1053 ConstantInliner CI(Value); 1054 return CI.loadAndFinalize(Reg, Value.getBitWidth(), X86::KMOVDkm); 1055 } 1056 break; 1057 case 64: 1058 if (STI.getFeatureBits()[X86::FeatureBWI]) { 1059 ConstantInliner CI(Value); 1060 return CI.loadAndFinalize(Reg, Value.getBitWidth(), X86::KMOVQkm); 1061 } 1062 break; 1063 } 1064 } 1065 ConstantInliner CI(Value); 1066 if (X86::VR64RegClass.contains(Reg)) 1067 return CI.loadAndFinalize(Reg, 64, X86::MMX_MOVQ64rm); 1068 if (X86::VR128RegClass.contains(Reg)) { 1069 if (STI.getFeatureBits()[X86::FeatureAVX]) 1070 return CI.loadAndFinalize(Reg, 128, X86::VMOVDQUrm); 1071 return CI.loadAndFinalize(Reg, 128, X86::MOVDQUrm); 1072 } 1073 if (X86::VR128XRegClass.contains(Reg)) { 1074 if (STI.getFeatureBits()[X86::FeatureAVX512]) 1075 return CI.loadAndFinalize(Reg, 128, X86::VMOVDQU32Z128rm); 1076 } 1077 if (X86::VR256RegClass.contains(Reg)) { 1078 if (STI.getFeatureBits()[X86::FeatureAVX]) 1079 return CI.loadAndFinalize(Reg, 256, X86::VMOVDQUYrm); 1080 } 1081 if (X86::VR256XRegClass.contains(Reg)) { 1082 if (STI.getFeatureBits()[X86::FeatureAVX512]) 1083 return CI.loadAndFinalize(Reg, 256, X86::VMOVDQU32Z256rm); 1084 } 1085 if (X86::VR512RegClass.contains(Reg)) 1086 if (STI.getFeatureBits()[X86::FeatureAVX512]) 1087 return CI.loadAndFinalize(Reg, 512, X86::VMOVDQU32Zrm); 1088 if (X86::RSTRegClass.contains(Reg)) { 1089 return CI.loadX87STAndFinalize(Reg); 1090 } 1091 if (X86::RFP32RegClass.contains(Reg) || X86::RFP64RegClass.contains(Reg) || 1092 X86::RFP80RegClass.contains(Reg)) { 1093 return CI.loadX87FPAndFinalize(Reg); 1094 } 1095 if (Reg == X86::EFLAGS) 1096 return CI.popFlagAndFinalize(); 1097 if (Reg == X86::MXCSR) 1098 return CI.loadImplicitRegAndFinalize( 1099 STI.getFeatureBits()[X86::FeatureAVX] ? X86::VLDMXCSR : X86::LDMXCSR, 1100 0x1f80); 1101 if (Reg == X86::FPCW) 1102 return CI.loadImplicitRegAndFinalize(X86::FLDCW16m, 0x37f); 1103 if (Reg == X86::DF) 1104 return CI.loadDirectionFlagAndFinalize(); 1105 return {}; // Not yet implemented. 1106 } 1107 1108 #ifdef __linux__ 1109 1110 #ifdef __arm__ 1111 static constexpr const uintptr_t VAddressSpaceCeiling = 0xC0000000; 1112 #else 1113 static constexpr const uintptr_t VAddressSpaceCeiling = 0x0000800000000000; 1114 #endif 1115 1116 void generateRoundToNearestPage(unsigned int Register, 1117 std::vector<MCInst> &GeneratedCode) { 1118 int PageSizeShift = static_cast<int>(round(log2(getpagesize()))); 1119 // Round down to the nearest page by getting rid of the least significant bits 1120 // representing location in the page. Shift right to get rid of this info and 1121 // then shift back left. 1122 GeneratedCode.push_back(MCInstBuilder(X86::SHR64ri) 1123 .addReg(Register) 1124 .addReg(Register) 1125 .addImm(PageSizeShift)); 1126 GeneratedCode.push_back(MCInstBuilder(X86::SHL64ri) 1127 .addReg(Register) 1128 .addReg(Register) 1129 .addImm(PageSizeShift)); 1130 } 1131 1132 void generateGetInstructionPointer(unsigned int ResultRegister, 1133 std::vector<MCInst> &GeneratedCode) { 1134 // Use a load effective address to get the current instruction pointer and put 1135 // it into the result register. 1136 GeneratedCode.push_back(MCInstBuilder(X86::LEA64r) 1137 .addReg(ResultRegister) 1138 .addReg(X86::RIP) 1139 .addImm(1) 1140 .addReg(0) 1141 .addImm(0) 1142 .addReg(0)); 1143 } 1144 1145 void ExegesisX86Target::generateLowerMunmap( 1146 std::vector<MCInst> &GeneratedCode) const { 1147 // Unmap starting at address zero 1148 GeneratedCode.push_back(loadImmediate(X86::RDI, 64, APInt(64, 0))); 1149 // Get the current instruction pointer so we know where to unmap up to. 1150 generateGetInstructionPointer(X86::RSI, GeneratedCode); 1151 generateRoundToNearestPage(X86::RSI, GeneratedCode); 1152 // Subtract a page from the end of the unmap so we don't unmap the currently 1153 // executing section. 1154 GeneratedCode.push_back(MCInstBuilder(X86::SUB64ri32) 1155 .addReg(X86::RSI) 1156 .addReg(X86::RSI) 1157 .addImm(getpagesize())); 1158 generateSyscall(SYS_munmap, GeneratedCode); 1159 } 1160 1161 void ExegesisX86Target::generateUpperMunmap( 1162 std::vector<MCInst> &GeneratedCode) const { 1163 generateGetInstructionPointer(X86::R8, GeneratedCode); 1164 // Load in the size of the snippet to RDI from from the argument register. 1165 GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) 1166 .addReg(X86::RDI) 1167 .addReg(ArgumentRegisters::CodeSize)); 1168 // Add the length of the snippet (in %RDI) to the current instruction pointer 1169 // (%R8) to get the address where we should start unmapping at. 1170 GeneratedCode.push_back(MCInstBuilder(X86::ADD64rr) 1171 .addReg(X86::RDI) 1172 .addReg(X86::RDI) 1173 .addReg(X86::R8)); 1174 generateRoundToNearestPage(X86::RDI, GeneratedCode); 1175 // Add a one page to the start address to ensure that we're above the snippet 1176 // since the above function rounds down. 1177 GeneratedCode.push_back(MCInstBuilder(X86::ADD64ri32) 1178 .addReg(X86::RDI) 1179 .addReg(X86::RDI) 1180 .addImm(getpagesize())); 1181 // Unmap to just one page under the ceiling of the address space. 1182 GeneratedCode.push_back(loadImmediate( 1183 X86::RSI, 64, APInt(64, VAddressSpaceCeiling - getpagesize()))); 1184 GeneratedCode.push_back(MCInstBuilder(X86::SUB64rr) 1185 .addReg(X86::RSI) 1186 .addReg(X86::RSI) 1187 .addReg(X86::RDI)); 1188 generateSyscall(SYS_munmap, GeneratedCode); 1189 } 1190 1191 std::vector<MCInst> 1192 ExegesisX86Target::generateExitSyscall(unsigned ExitCode) const { 1193 std::vector<MCInst> ExitCallCode; 1194 ExitCallCode.push_back(loadImmediate(X86::RDI, 64, APInt(64, ExitCode))); 1195 generateSyscall(SYS_exit, ExitCallCode); 1196 return ExitCallCode; 1197 } 1198 1199 std::vector<MCInst> 1200 ExegesisX86Target::generateMmap(uintptr_t Address, size_t Length, 1201 uintptr_t FileDescriptorAddress) const { 1202 std::vector<MCInst> MmapCode; 1203 MmapCode.push_back(loadImmediate(X86::RDI, 64, APInt(64, Address))); 1204 MmapCode.push_back(loadImmediate(X86::RSI, 64, APInt(64, Length))); 1205 MmapCode.push_back( 1206 loadImmediate(X86::RDX, 64, APInt(64, PROT_READ | PROT_WRITE))); 1207 MmapCode.push_back( 1208 loadImmediate(X86::R10, 64, APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE))); 1209 // Copy file descriptor location from aux memory into R8 1210 MmapCode.push_back( 1211 loadImmediate(X86::R8, 64, APInt(64, FileDescriptorAddress))); 1212 // Dereference file descriptor into FD argument register 1213 MmapCode.push_back(MCInstBuilder(X86::MOV32rm) 1214 .addReg(X86::R8D) 1215 .addReg(X86::R8) 1216 .addImm(1) 1217 .addReg(0) 1218 .addImm(0) 1219 .addReg(0)); 1220 MmapCode.push_back(loadImmediate(X86::R9, 64, APInt(64, 0))); 1221 generateSyscall(SYS_mmap, MmapCode); 1222 return MmapCode; 1223 } 1224 1225 void ExegesisX86Target::generateMmapAuxMem( 1226 std::vector<MCInst> &GeneratedCode) const { 1227 GeneratedCode.push_back( 1228 loadImmediate(X86::RDI, 64, APInt(64, getAuxiliaryMemoryStartAddress()))); 1229 GeneratedCode.push_back(loadImmediate( 1230 X86::RSI, 64, APInt(64, SubprocessMemory::AuxiliaryMemorySize))); 1231 GeneratedCode.push_back( 1232 loadImmediate(X86::RDX, 64, APInt(64, PROT_READ | PROT_WRITE))); 1233 GeneratedCode.push_back( 1234 loadImmediate(X86::R10, 64, APInt(64, MAP_SHARED | MAP_FIXED_NOREPLACE))); 1235 GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) 1236 .addReg(X86::R8) 1237 .addReg(ArgumentRegisters::AuxiliaryMemoryFD)); 1238 GeneratedCode.push_back(loadImmediate(X86::R9, 64, APInt(64, 0))); 1239 generateSyscall(SYS_mmap, GeneratedCode); 1240 } 1241 1242 void ExegesisX86Target::moveArgumentRegisters( 1243 std::vector<MCInst> &GeneratedCode) const { 1244 GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) 1245 .addReg(ArgumentRegisters::CodeSize) 1246 .addReg(X86::RDI)); 1247 GeneratedCode.push_back(MCInstBuilder(X86::MOV64rr) 1248 .addReg(ArgumentRegisters::AuxiliaryMemoryFD) 1249 .addReg(X86::RSI)); 1250 } 1251 1252 std::vector<MCInst> ExegesisX86Target::generateMemoryInitialSetup() const { 1253 std::vector<MCInst> MemoryInitialSetupCode; 1254 moveArgumentRegisters(MemoryInitialSetupCode); 1255 generateLowerMunmap(MemoryInitialSetupCode); 1256 generateUpperMunmap(MemoryInitialSetupCode); 1257 generateMmapAuxMem(MemoryInitialSetupCode); 1258 return MemoryInitialSetupCode; 1259 } 1260 1261 std::vector<MCInst> ExegesisX86Target::setStackRegisterToAuxMem() const { 1262 // Moves %rsp to the end of the auxiliary memory 1263 return {MCInstBuilder(X86::MOV64ri) 1264 .addReg(X86::RSP) 1265 .addImm(getAuxiliaryMemoryStartAddress() + 1266 SubprocessMemory::AuxiliaryMemorySize)}; 1267 } 1268 1269 uintptr_t ExegesisX86Target::getAuxiliaryMemoryStartAddress() const { 1270 // Return the second to last page in the virtual address space to try and 1271 // prevent interference with memory annotations in the snippet 1272 return VAddressSpaceCeiling - 2 * getpagesize(); 1273 } 1274 1275 std::vector<MCInst> 1276 ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const { 1277 std::vector<MCInst> ConfigurePerfCounterCode; 1278 if (SaveRegisters) 1279 saveSyscallRegisters(ConfigurePerfCounterCode, 3); 1280 ConfigurePerfCounterCode.push_back( 1281 loadImmediate(X86::RDI, 64, APInt(64, getAuxiliaryMemoryStartAddress()))); 1282 ConfigurePerfCounterCode.push_back(MCInstBuilder(X86::MOV32rm) 1283 .addReg(X86::EDI) 1284 .addReg(X86::RDI) 1285 .addImm(1) 1286 .addReg(0) 1287 .addImm(0) 1288 .addReg(0)); 1289 ConfigurePerfCounterCode.push_back( 1290 loadImmediate(X86::RSI, 64, APInt(64, Request))); 1291 #ifdef HAVE_LIBPFM 1292 ConfigurePerfCounterCode.push_back( 1293 loadImmediate(X86::RDX, 64, APInt(64, PERF_IOC_FLAG_GROUP))); 1294 #endif // HAVE_LIBPFM 1295 generateSyscall(SYS_ioctl, ConfigurePerfCounterCode); 1296 if (SaveRegisters) 1297 restoreSyscallRegisters(ConfigurePerfCounterCode, 3); 1298 return ConfigurePerfCounterCode; 1299 } 1300 1301 std::vector<MCRegister> ExegesisX86Target::getArgumentRegisters() const { 1302 return {X86::RDI, X86::RSI}; 1303 } 1304 1305 std::vector<MCRegister> ExegesisX86Target::getRegistersNeedSaving() const { 1306 return {X86::RAX, X86::RDI, X86::RSI, X86::RCX, X86::R11}; 1307 } 1308 1309 #endif // __linux__ 1310 1311 // Instruction can have some variable operands, and we may want to see how 1312 // different operands affect performance. So for each operand position, 1313 // precompute all the possible choices we might care about, 1314 // and greedily generate all the possible combinations of choices. 1315 std::vector<InstructionTemplate> ExegesisX86Target::generateInstructionVariants( 1316 const Instruction &Instr, unsigned MaxConfigsPerOpcode) const { 1317 bool Exploration = false; 1318 SmallVector<SmallVector<MCOperand, 1>, 4> VariableChoices; 1319 VariableChoices.resize(Instr.Variables.size()); 1320 for (auto I : zip(Instr.Variables, VariableChoices)) { 1321 const Variable &Var = std::get<0>(I); 1322 SmallVectorImpl<MCOperand> &Choices = std::get<1>(I); 1323 1324 switch (Instr.getPrimaryOperand(Var).getExplicitOperandInfo().OperandType) { 1325 default: 1326 // We don't wish to explicitly explore this variable. 1327 Choices.emplace_back(); // But add invalid MCOperand to simplify logic. 1328 continue; 1329 case X86::OperandType::OPERAND_COND_CODE: { 1330 Exploration = true; 1331 auto CondCodes = enum_seq_inclusive(X86::CondCode::COND_O, 1332 X86::CondCode::LAST_VALID_COND, 1333 force_iteration_on_noniterable_enum); 1334 Choices.reserve(CondCodes.size()); 1335 for (int CondCode : CondCodes) 1336 Choices.emplace_back(MCOperand::createImm(CondCode)); 1337 break; 1338 } 1339 } 1340 } 1341 1342 // If we don't wish to explore any variables, defer to the baseline method. 1343 if (!Exploration) 1344 return ExegesisTarget::generateInstructionVariants(Instr, 1345 MaxConfigsPerOpcode); 1346 1347 std::vector<InstructionTemplate> Variants; 1348 size_t NumVariants; 1349 CombinationGenerator<MCOperand, decltype(VariableChoices)::value_type, 4> G( 1350 VariableChoices); 1351 1352 // How many operand combinations can we produce, within the limit? 1353 NumVariants = std::min(G.numCombinations(), (size_t)MaxConfigsPerOpcode); 1354 // And actually produce all the wanted operand combinations. 1355 Variants.reserve(NumVariants); 1356 G.generate([&](ArrayRef<MCOperand> State) -> bool { 1357 Variants.emplace_back(&Instr); 1358 Variants.back().setVariableValues(State); 1359 // Did we run out of space for variants? 1360 return Variants.size() >= NumVariants; 1361 }); 1362 1363 assert(Variants.size() == NumVariants && 1364 Variants.size() <= MaxConfigsPerOpcode && 1365 "Should not produce too many variants"); 1366 return Variants; 1367 } 1368 1369 static ExegesisTarget *getTheExegesisX86Target() { 1370 static ExegesisX86Target Target; 1371 return &Target; 1372 } 1373 1374 void InitializeX86ExegesisTarget() { 1375 ExegesisTarget::registerTarget(getTheExegesisX86Target()); 1376 } 1377 1378 } // namespace exegesis 1379 } // namespace llvm 1380