1 //===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// Provides analysis for querying information about KnownBits during GISel 10 /// passes. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 14 #include "llvm/ADT/StringExtras.h" 15 #include "llvm/Analysis/ValueTracking.h" 16 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 17 #include "llvm/CodeGen/GlobalISel/Utils.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/CodeGen/TargetLowering.h" 21 #include "llvm/CodeGen/TargetOpcodes.h" 22 #include "llvm/IR/ConstantRange.h" 23 #include "llvm/Target/TargetMachine.h" 24 25 #define DEBUG_TYPE "gisel-known-bits" 26 27 using namespace llvm; 28 29 char llvm::GISelKnownBitsAnalysis::ID = 0; 30 31 INITIALIZE_PASS(GISelKnownBitsAnalysis, DEBUG_TYPE, 32 "Analysis for ComputingKnownBits", false, true) 33 34 GISelKnownBits::GISelKnownBits(MachineFunction &MF, unsigned MaxDepth) 35 : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), 36 DL(MF.getFunction().getDataLayout()), MaxDepth(MaxDepth) {} 37 38 Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) { 39 const MachineInstr *MI = MRI.getVRegDef(R); 40 switch (MI->getOpcode()) { 41 case TargetOpcode::COPY: 42 return computeKnownAlignment(MI->getOperand(1).getReg(), Depth); 43 case TargetOpcode::G_ASSERT_ALIGN: { 44 // TODO: Min with source 45 return Align(MI->getOperand(2).getImm()); 46 } 47 case TargetOpcode::G_FRAME_INDEX: { 48 int FrameIdx = MI->getOperand(1).getIndex(); 49 return MF.getFrameInfo().getObjectAlign(FrameIdx); 50 } 51 case TargetOpcode::G_INTRINSIC: 52 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 53 case TargetOpcode::G_INTRINSIC_CONVERGENT: 54 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: 55 default: 56 return TL.computeKnownAlignForTargetInstr(*this, R, MRI, Depth + 1); 57 } 58 } 59 60 KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) { 61 assert(MI.getNumExplicitDefs() == 1 && 62 "expected single return generic instruction"); 63 return getKnownBits(MI.getOperand(0).getReg()); 64 } 65 66 KnownBits GISelKnownBits::getKnownBits(Register R) { 67 const LLT Ty = MRI.getType(R); 68 // Since the number of lanes in a scalable vector is unknown at compile time, 69 // we track one bit which is implicitly broadcast to all lanes. This means 70 // that all lanes in a scalable vector are considered demanded. 71 APInt DemandedElts = 72 Ty.isFixedVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); 73 return getKnownBits(R, DemandedElts); 74 } 75 76 KnownBits GISelKnownBits::getKnownBits(Register R, const APInt &DemandedElts, 77 unsigned Depth) { 78 // For now, we only maintain the cache during one request. 79 assert(ComputeKnownBitsCache.empty() && "Cache should have been cleared"); 80 81 KnownBits Known; 82 computeKnownBitsImpl(R, Known, DemandedElts, Depth); 83 ComputeKnownBitsCache.clear(); 84 return Known; 85 } 86 87 bool GISelKnownBits::signBitIsZero(Register R) { 88 LLT Ty = MRI.getType(R); 89 unsigned BitWidth = Ty.getScalarSizeInBits(); 90 return maskedValueIsZero(R, APInt::getSignMask(BitWidth)); 91 } 92 93 APInt GISelKnownBits::getKnownZeroes(Register R) { 94 return getKnownBits(R).Zero; 95 } 96 97 APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; } 98 99 LLVM_ATTRIBUTE_UNUSED static void 100 dumpResult(const MachineInstr &MI, const KnownBits &Known, unsigned Depth) { 101 dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" << Depth 102 << "] Computed for: " << MI << "[" << Depth << "] Known: 0x" 103 << toString(Known.Zero | Known.One, 16, false) << "\n" 104 << "[" << Depth << "] Zero: 0x" << toString(Known.Zero, 16, false) 105 << "\n" 106 << "[" << Depth << "] One: 0x" << toString(Known.One, 16, false) 107 << "\n"; 108 } 109 110 /// Compute known bits for the intersection of \p Src0 and \p Src1 111 void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1, 112 KnownBits &Known, 113 const APInt &DemandedElts, 114 unsigned Depth) { 115 // Test src1 first, since we canonicalize simpler expressions to the RHS. 116 computeKnownBitsImpl(Src1, Known, DemandedElts, Depth); 117 118 // If we don't know any bits, early out. 119 if (Known.isUnknown()) 120 return; 121 122 KnownBits Known2; 123 computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth); 124 125 // Only known if known in both the LHS and RHS. 126 Known = Known.intersectWith(Known2); 127 } 128 129 // Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is 130 // created using Width. Use this function when the inputs are KnownBits 131 // objects. TODO: Move this KnownBits.h if this is usable in more cases. 132 static KnownBits extractBits(unsigned BitWidth, const KnownBits &SrcOpKnown, 133 const KnownBits &OffsetKnown, 134 const KnownBits &WidthKnown) { 135 KnownBits Mask(BitWidth); 136 Mask.Zero = APInt::getBitsSetFrom( 137 BitWidth, WidthKnown.getMaxValue().getLimitedValue(BitWidth)); 138 Mask.One = APInt::getLowBitsSet( 139 BitWidth, WidthKnown.getMinValue().getLimitedValue(BitWidth)); 140 return KnownBits::lshr(SrcOpKnown, OffsetKnown) & Mask; 141 } 142 143 void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, 144 const APInt &DemandedElts, 145 unsigned Depth) { 146 MachineInstr &MI = *MRI.getVRegDef(R); 147 unsigned Opcode = MI.getOpcode(); 148 LLT DstTy = MRI.getType(R); 149 150 // Handle the case where this is called on a register that does not have a 151 // type constraint. For example, it may be post-ISel or this target might not 152 // preserve the type when early-selecting instructions. 153 if (!DstTy.isValid()) { 154 Known = KnownBits(); 155 return; 156 } 157 158 #ifndef NDEBUG 159 if (DstTy.isFixedVector()) { 160 assert( 161 DstTy.getNumElements() == DemandedElts.getBitWidth() && 162 "DemandedElt width should equal the fixed vector number of elements"); 163 } else { 164 assert(DemandedElts.getBitWidth() == 1 && DemandedElts == APInt(1, 1) && 165 "DemandedElt width should be 1 for scalars or scalable vectors"); 166 } 167 #endif 168 169 unsigned BitWidth = DstTy.getScalarSizeInBits(); 170 auto CacheEntry = ComputeKnownBitsCache.find(R); 171 if (CacheEntry != ComputeKnownBitsCache.end()) { 172 Known = CacheEntry->second; 173 LLVM_DEBUG(dbgs() << "Cache hit at "); 174 LLVM_DEBUG(dumpResult(MI, Known, Depth)); 175 assert(Known.getBitWidth() == BitWidth && "Cache entry size doesn't match"); 176 return; 177 } 178 Known = KnownBits(BitWidth); // Don't know anything 179 180 // Depth may get bigger than max depth if it gets passed to a different 181 // GISelKnownBits object. 182 // This may happen when say a generic part uses a GISelKnownBits object 183 // with some max depth, but then we hit TL.computeKnownBitsForTargetInstr 184 // which creates a new GISelKnownBits object with a different and smaller 185 // depth. If we just check for equality, we would never exit if the depth 186 // that is passed down to the target specific GISelKnownBits object is 187 // already bigger than its max depth. 188 if (Depth >= getMaxDepth()) 189 return; 190 191 if (!DemandedElts) 192 return; // No demanded elts, better to assume we don't know anything. 193 194 KnownBits Known2; 195 196 switch (Opcode) { 197 default: 198 TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI, 199 Depth); 200 break; 201 case TargetOpcode::G_BUILD_VECTOR: { 202 // Collect the known bits that are shared by every demanded vector element. 203 Known.Zero.setAllBits(); Known.One.setAllBits(); 204 for (unsigned i = 0, e = MI.getNumOperands() - 1; i < e; ++i) { 205 if (!DemandedElts[i]) 206 continue; 207 208 computeKnownBitsImpl(MI.getOperand(i + 1).getReg(), Known2, APInt(1, 1), 209 Depth + 1); 210 211 // Known bits are the values that are shared by every demanded element. 212 Known = Known.intersectWith(Known2); 213 214 // If we don't know any bits, early out. 215 if (Known.isUnknown()) 216 break; 217 } 218 break; 219 } 220 case TargetOpcode::COPY: 221 case TargetOpcode::G_PHI: 222 case TargetOpcode::PHI: { 223 Known.One = APInt::getAllOnes(BitWidth); 224 Known.Zero = APInt::getAllOnes(BitWidth); 225 // Destination registers should not have subregisters at this 226 // point of the pipeline, otherwise the main live-range will be 227 // defined more than once, which is against SSA. 228 assert(MI.getOperand(0).getSubReg() == 0 && "Is this code in SSA?"); 229 // Record in the cache that we know nothing for MI. 230 // This will get updated later and in the meantime, if we reach that 231 // phi again, because of a loop, we will cut the search thanks to this 232 // cache entry. 233 // We could actually build up more information on the phi by not cutting 234 // the search, but that additional information is more a side effect 235 // than an intended choice. 236 // Therefore, for now, save on compile time until we derive a proper way 237 // to derive known bits for PHIs within loops. 238 ComputeKnownBitsCache[R] = KnownBits(BitWidth); 239 // PHI's operand are a mix of registers and basic blocks interleaved. 240 // We only care about the register ones. 241 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { 242 const MachineOperand &Src = MI.getOperand(Idx); 243 Register SrcReg = Src.getReg(); 244 // Look through trivial copies and phis but don't look through trivial 245 // copies or phis of the form `%1:(s32) = OP %0:gpr32`, known-bits 246 // analysis is currently unable to determine the bit width of a 247 // register class. 248 // 249 // We can't use NoSubRegister by name as it's defined by each target but 250 // it's always defined to be 0 by tablegen. 251 if (SrcReg.isVirtual() && Src.getSubReg() == 0 /*NoSubRegister*/ && 252 MRI.getType(SrcReg).isValid()) { 253 // For COPYs we don't do anything, don't increase the depth. 254 computeKnownBitsImpl(SrcReg, Known2, DemandedElts, 255 Depth + (Opcode != TargetOpcode::COPY)); 256 Known2 = Known2.anyextOrTrunc(BitWidth); 257 Known = Known.intersectWith(Known2); 258 // If we reach a point where we don't know anything 259 // just stop looking through the operands. 260 if (Known.isUnknown()) 261 break; 262 } else { 263 // We know nothing. 264 Known = KnownBits(BitWidth); 265 break; 266 } 267 } 268 break; 269 } 270 case TargetOpcode::G_CONSTANT: { 271 Known = KnownBits::makeConstant(MI.getOperand(1).getCImm()->getValue()); 272 break; 273 } 274 case TargetOpcode::G_FRAME_INDEX: { 275 int FrameIdx = MI.getOperand(1).getIndex(); 276 TL.computeKnownBitsForFrameIndex(FrameIdx, Known, MF); 277 break; 278 } 279 case TargetOpcode::G_SUB: { 280 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 281 Depth + 1); 282 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, 283 Depth + 1); 284 Known = KnownBits::sub(Known, Known2); 285 break; 286 } 287 case TargetOpcode::G_XOR: { 288 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, 289 Depth + 1); 290 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, 291 Depth + 1); 292 293 Known ^= Known2; 294 break; 295 } 296 case TargetOpcode::G_PTR_ADD: { 297 if (DstTy.isVector()) 298 break; 299 // G_PTR_ADD is like G_ADD. FIXME: Is this true for all targets? 300 LLT Ty = MRI.getType(MI.getOperand(1).getReg()); 301 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) 302 break; 303 [[fallthrough]]; 304 } 305 case TargetOpcode::G_ADD: { 306 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 307 Depth + 1); 308 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, 309 Depth + 1); 310 Known = KnownBits::add(Known, Known2); 311 break; 312 } 313 case TargetOpcode::G_AND: { 314 // If either the LHS or the RHS are Zero, the result is zero. 315 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, 316 Depth + 1); 317 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, 318 Depth + 1); 319 320 Known &= Known2; 321 break; 322 } 323 case TargetOpcode::G_OR: { 324 // If either the LHS or the RHS are Zero, the result is zero. 325 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, 326 Depth + 1); 327 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, 328 Depth + 1); 329 330 Known |= Known2; 331 break; 332 } 333 case TargetOpcode::G_MUL: { 334 computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, 335 Depth + 1); 336 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, 337 Depth + 1); 338 Known = KnownBits::mul(Known, Known2); 339 break; 340 } 341 case TargetOpcode::G_SELECT: { 342 computeKnownBitsMin(MI.getOperand(2).getReg(), MI.getOperand(3).getReg(), 343 Known, DemandedElts, Depth + 1); 344 break; 345 } 346 case TargetOpcode::G_SMIN: { 347 // TODO: Handle clamp pattern with number of sign bits 348 KnownBits KnownRHS; 349 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 350 Depth + 1); 351 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, 352 Depth + 1); 353 Known = KnownBits::smin(Known, KnownRHS); 354 break; 355 } 356 case TargetOpcode::G_SMAX: { 357 // TODO: Handle clamp pattern with number of sign bits 358 KnownBits KnownRHS; 359 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 360 Depth + 1); 361 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, DemandedElts, 362 Depth + 1); 363 Known = KnownBits::smax(Known, KnownRHS); 364 break; 365 } 366 case TargetOpcode::G_UMIN: { 367 KnownBits KnownRHS; 368 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, 369 DemandedElts, Depth + 1); 370 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, 371 DemandedElts, Depth + 1); 372 Known = KnownBits::umin(Known, KnownRHS); 373 break; 374 } 375 case TargetOpcode::G_UMAX: { 376 KnownBits KnownRHS; 377 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, 378 DemandedElts, Depth + 1); 379 computeKnownBitsImpl(MI.getOperand(2).getReg(), KnownRHS, 380 DemandedElts, Depth + 1); 381 Known = KnownBits::umax(Known, KnownRHS); 382 break; 383 } 384 case TargetOpcode::G_FCMP: 385 case TargetOpcode::G_ICMP: { 386 if (DstTy.isVector()) 387 break; 388 if (TL.getBooleanContents(DstTy.isVector(), 389 Opcode == TargetOpcode::G_FCMP) == 390 TargetLowering::ZeroOrOneBooleanContent && 391 BitWidth > 1) 392 Known.Zero.setBitsFrom(1); 393 break; 394 } 395 case TargetOpcode::G_SEXT: { 396 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 397 Depth + 1); 398 // If the sign bit is known to be zero or one, then sext will extend 399 // it to the top bits, else it will just zext. 400 Known = Known.sext(BitWidth); 401 break; 402 } 403 case TargetOpcode::G_ASSERT_SEXT: 404 case TargetOpcode::G_SEXT_INREG: { 405 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 406 Depth + 1); 407 Known = Known.sextInReg(MI.getOperand(2).getImm()); 408 break; 409 } 410 case TargetOpcode::G_ANYEXT: { 411 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, 412 Depth + 1); 413 Known = Known.anyext(BitWidth); 414 break; 415 } 416 case TargetOpcode::G_LOAD: { 417 const MachineMemOperand *MMO = *MI.memoperands_begin(); 418 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); 419 if (const MDNode *Ranges = MMO->getRanges()) 420 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange); 421 Known = KnownRange.anyext(Known.getBitWidth()); 422 break; 423 } 424 case TargetOpcode::G_SEXTLOAD: 425 case TargetOpcode::G_ZEXTLOAD: { 426 if (DstTy.isVector()) 427 break; 428 const MachineMemOperand *MMO = *MI.memoperands_begin(); 429 KnownBits KnownRange(MMO->getMemoryType().getScalarSizeInBits()); 430 if (const MDNode *Ranges = MMO->getRanges()) 431 computeKnownBitsFromRangeMetadata(*Ranges, KnownRange); 432 Known = Opcode == TargetOpcode::G_SEXTLOAD 433 ? KnownRange.sext(Known.getBitWidth()) 434 : KnownRange.zext(Known.getBitWidth()); 435 break; 436 } 437 case TargetOpcode::G_ASHR: { 438 KnownBits LHSKnown, RHSKnown; 439 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, 440 Depth + 1); 441 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, 442 Depth + 1); 443 Known = KnownBits::ashr(LHSKnown, RHSKnown); 444 break; 445 } 446 case TargetOpcode::G_LSHR: { 447 KnownBits LHSKnown, RHSKnown; 448 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, 449 Depth + 1); 450 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, 451 Depth + 1); 452 Known = KnownBits::lshr(LHSKnown, RHSKnown); 453 break; 454 } 455 case TargetOpcode::G_SHL: { 456 KnownBits LHSKnown, RHSKnown; 457 computeKnownBitsImpl(MI.getOperand(1).getReg(), LHSKnown, DemandedElts, 458 Depth + 1); 459 computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, 460 Depth + 1); 461 Known = KnownBits::shl(LHSKnown, RHSKnown); 462 break; 463 } 464 case TargetOpcode::G_INTTOPTR: 465 case TargetOpcode::G_PTRTOINT: 466 if (DstTy.isVector()) 467 break; 468 // Fall through and handle them the same as zext/trunc. 469 [[fallthrough]]; 470 case TargetOpcode::G_ASSERT_ZEXT: 471 case TargetOpcode::G_ZEXT: 472 case TargetOpcode::G_TRUNC: { 473 Register SrcReg = MI.getOperand(1).getReg(); 474 LLT SrcTy = MRI.getType(SrcReg); 475 unsigned SrcBitWidth; 476 477 // G_ASSERT_ZEXT stores the original bitwidth in the immediate operand. 478 if (Opcode == TargetOpcode::G_ASSERT_ZEXT) 479 SrcBitWidth = MI.getOperand(2).getImm(); 480 else { 481 SrcBitWidth = SrcTy.isPointer() 482 ? DL.getIndexSizeInBits(SrcTy.getAddressSpace()) 483 : SrcTy.getSizeInBits(); 484 } 485 assert(SrcBitWidth && "SrcBitWidth can't be zero"); 486 Known = Known.zextOrTrunc(SrcBitWidth); 487 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); 488 Known = Known.zextOrTrunc(BitWidth); 489 if (BitWidth > SrcBitWidth) 490 Known.Zero.setBitsFrom(SrcBitWidth); 491 break; 492 } 493 case TargetOpcode::G_ASSERT_ALIGN: { 494 int64_t LogOfAlign = Log2_64(MI.getOperand(2).getImm()); 495 496 // TODO: Should use maximum with source 497 // If a node is guaranteed to be aligned, set low zero bits accordingly as 498 // well as clearing one bits. 499 Known.Zero.setLowBits(LogOfAlign); 500 Known.One.clearLowBits(LogOfAlign); 501 break; 502 } 503 case TargetOpcode::G_MERGE_VALUES: { 504 unsigned NumOps = MI.getNumOperands(); 505 unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 506 507 for (unsigned I = 0; I != NumOps - 1; ++I) { 508 KnownBits SrcOpKnown; 509 computeKnownBitsImpl(MI.getOperand(I + 1).getReg(), SrcOpKnown, 510 DemandedElts, Depth + 1); 511 Known.insertBits(SrcOpKnown, I * OpSize); 512 } 513 break; 514 } 515 case TargetOpcode::G_UNMERGE_VALUES: { 516 unsigned NumOps = MI.getNumOperands(); 517 Register SrcReg = MI.getOperand(NumOps - 1).getReg(); 518 LLT SrcTy = MRI.getType(SrcReg); 519 520 if (SrcTy.isVector() && SrcTy.getScalarType() != DstTy.getScalarType()) 521 return; // TODO: Handle vector->subelement unmerges 522 523 // Figure out the result operand index 524 unsigned DstIdx = 0; 525 for (; DstIdx != NumOps - 1 && MI.getOperand(DstIdx).getReg() != R; 526 ++DstIdx) 527 ; 528 529 APInt SubDemandedElts = DemandedElts; 530 if (SrcTy.isVector()) { 531 unsigned DstLanes = DstTy.isVector() ? DstTy.getNumElements() : 1; 532 SubDemandedElts = 533 DemandedElts.zext(SrcTy.getNumElements()).shl(DstIdx * DstLanes); 534 } 535 536 KnownBits SrcOpKnown; 537 computeKnownBitsImpl(SrcReg, SrcOpKnown, SubDemandedElts, Depth + 1); 538 539 if (SrcTy.isVector()) 540 Known = std::move(SrcOpKnown); 541 else 542 Known = SrcOpKnown.extractBits(BitWidth, BitWidth * DstIdx); 543 break; 544 } 545 case TargetOpcode::G_BSWAP: { 546 Register SrcReg = MI.getOperand(1).getReg(); 547 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); 548 Known = Known.byteSwap(); 549 break; 550 } 551 case TargetOpcode::G_BITREVERSE: { 552 Register SrcReg = MI.getOperand(1).getReg(); 553 computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); 554 Known = Known.reverseBits(); 555 break; 556 } 557 case TargetOpcode::G_CTPOP: { 558 computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, 559 Depth + 1); 560 // We can bound the space the count needs. Also, bits known to be zero can't 561 // contribute to the population. 562 unsigned BitsPossiblySet = Known2.countMaxPopulation(); 563 unsigned LowBits = llvm::bit_width(BitsPossiblySet); 564 Known.Zero.setBitsFrom(LowBits); 565 // TODO: we could bound Known.One using the lower bound on the number of 566 // bits which might be set provided by popcnt KnownOne2. 567 break; 568 } 569 case TargetOpcode::G_UBFX: { 570 KnownBits SrcOpKnown, OffsetKnown, WidthKnown; 571 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, 572 Depth + 1); 573 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts, 574 Depth + 1); 575 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts, 576 Depth + 1); 577 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); 578 break; 579 } 580 case TargetOpcode::G_SBFX: { 581 KnownBits SrcOpKnown, OffsetKnown, WidthKnown; 582 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, 583 Depth + 1); 584 computeKnownBitsImpl(MI.getOperand(2).getReg(), OffsetKnown, DemandedElts, 585 Depth + 1); 586 computeKnownBitsImpl(MI.getOperand(3).getReg(), WidthKnown, DemandedElts, 587 Depth + 1); 588 Known = extractBits(BitWidth, SrcOpKnown, OffsetKnown, WidthKnown); 589 // Sign extend the extracted value using shift left and arithmetic shift 590 // right. 591 KnownBits ExtKnown = KnownBits::makeConstant(APInt(BitWidth, BitWidth)); 592 KnownBits ShiftKnown = KnownBits::sub(ExtKnown, WidthKnown); 593 Known = KnownBits::ashr(KnownBits::shl(Known, ShiftKnown), ShiftKnown); 594 break; 595 } 596 case TargetOpcode::G_UADDO: 597 case TargetOpcode::G_UADDE: 598 case TargetOpcode::G_SADDO: 599 case TargetOpcode::G_SADDE: 600 case TargetOpcode::G_USUBO: 601 case TargetOpcode::G_USUBE: 602 case TargetOpcode::G_SSUBO: 603 case TargetOpcode::G_SSUBE: 604 case TargetOpcode::G_UMULO: 605 case TargetOpcode::G_SMULO: { 606 if (MI.getOperand(1).getReg() == R) { 607 // If we know the result of a compare has the top bits zero, use this 608 // info. 609 if (TL.getBooleanContents(DstTy.isVector(), false) == 610 TargetLowering::ZeroOrOneBooleanContent && 611 BitWidth > 1) 612 Known.Zero.setBitsFrom(1); 613 } 614 break; 615 } 616 case TargetOpcode::G_CTLZ: 617 case TargetOpcode::G_CTLZ_ZERO_UNDEF: { 618 KnownBits SrcOpKnown; 619 computeKnownBitsImpl(MI.getOperand(1).getReg(), SrcOpKnown, DemandedElts, 620 Depth + 1); 621 // If we have a known 1, its position is our upper bound. 622 unsigned PossibleLZ = SrcOpKnown.countMaxLeadingZeros(); 623 unsigned LowBits = llvm::bit_width(PossibleLZ); 624 Known.Zero.setBitsFrom(LowBits); 625 break; 626 } 627 } 628 629 LLVM_DEBUG(dumpResult(MI, Known, Depth)); 630 631 // Update the cache. 632 ComputeKnownBitsCache[R] = Known; 633 } 634 635 /// Compute number of sign bits for the intersection of \p Src0 and \p Src1 636 unsigned GISelKnownBits::computeNumSignBitsMin(Register Src0, Register Src1, 637 const APInt &DemandedElts, 638 unsigned Depth) { 639 // Test src1 first, since we canonicalize simpler expressions to the RHS. 640 unsigned Src1SignBits = computeNumSignBits(Src1, DemandedElts, Depth); 641 if (Src1SignBits == 1) 642 return 1; 643 return std::min(computeNumSignBits(Src0, DemandedElts, Depth), Src1SignBits); 644 } 645 646 /// Compute the known number of sign bits with attached range metadata in the 647 /// memory operand. If this is an extending load, accounts for the behavior of 648 /// the high bits. 649 static unsigned computeNumSignBitsFromRangeMetadata(const GAnyLoad *Ld, 650 unsigned TyBits) { 651 const MDNode *Ranges = Ld->getRanges(); 652 if (!Ranges) 653 return 1; 654 655 ConstantRange CR = getConstantRangeFromMetadata(*Ranges); 656 if (TyBits > CR.getBitWidth()) { 657 switch (Ld->getOpcode()) { 658 case TargetOpcode::G_SEXTLOAD: 659 CR = CR.signExtend(TyBits); 660 break; 661 case TargetOpcode::G_ZEXTLOAD: 662 CR = CR.zeroExtend(TyBits); 663 break; 664 default: 665 break; 666 } 667 } 668 669 return std::min(CR.getSignedMin().getNumSignBits(), 670 CR.getSignedMax().getNumSignBits()); 671 } 672 673 unsigned GISelKnownBits::computeNumSignBits(Register R, 674 const APInt &DemandedElts, 675 unsigned Depth) { 676 MachineInstr &MI = *MRI.getVRegDef(R); 677 unsigned Opcode = MI.getOpcode(); 678 679 if (Opcode == TargetOpcode::G_CONSTANT) 680 return MI.getOperand(1).getCImm()->getValue().getNumSignBits(); 681 682 if (Depth == getMaxDepth()) 683 return 1; 684 685 if (!DemandedElts) 686 return 1; // No demanded elts, better to assume we don't know anything. 687 688 LLT DstTy = MRI.getType(R); 689 const unsigned TyBits = DstTy.getScalarSizeInBits(); 690 691 // Handle the case where this is called on a register that does not have a 692 // type constraint. This is unlikely to occur except by looking through copies 693 // but it is possible for the initial register being queried to be in this 694 // state. 695 if (!DstTy.isValid()) 696 return 1; 697 698 unsigned FirstAnswer = 1; 699 switch (Opcode) { 700 case TargetOpcode::COPY: { 701 MachineOperand &Src = MI.getOperand(1); 702 if (Src.getReg().isVirtual() && Src.getSubReg() == 0 && 703 MRI.getType(Src.getReg()).isValid()) { 704 // Don't increment Depth for this one since we didn't do any work. 705 return computeNumSignBits(Src.getReg(), DemandedElts, Depth); 706 } 707 708 return 1; 709 } 710 case TargetOpcode::G_SEXT: { 711 Register Src = MI.getOperand(1).getReg(); 712 LLT SrcTy = MRI.getType(Src); 713 unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits(); 714 return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp; 715 } 716 case TargetOpcode::G_ASSERT_SEXT: 717 case TargetOpcode::G_SEXT_INREG: { 718 // Max of the input and what this extends. 719 Register Src = MI.getOperand(1).getReg(); 720 unsigned SrcBits = MI.getOperand(2).getImm(); 721 unsigned InRegBits = TyBits - SrcBits + 1; 722 return std::max(computeNumSignBits(Src, DemandedElts, Depth + 1), InRegBits); 723 } 724 case TargetOpcode::G_LOAD: { 725 GLoad *Ld = cast<GLoad>(&MI); 726 if (DemandedElts != 1 || !getDataLayout().isLittleEndian()) 727 break; 728 729 return computeNumSignBitsFromRangeMetadata(Ld, TyBits); 730 } 731 case TargetOpcode::G_SEXTLOAD: { 732 GSExtLoad *Ld = cast<GSExtLoad>(&MI); 733 734 // FIXME: We need an in-memory type representation. 735 if (DstTy.isVector()) 736 return 1; 737 738 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); 739 if (NumBits != 1) 740 return NumBits; 741 742 // e.g. i16->i32 = '17' bits known. 743 const MachineMemOperand *MMO = *MI.memoperands_begin(); 744 return TyBits - MMO->getSizeInBits().getValue() + 1; 745 } 746 case TargetOpcode::G_ZEXTLOAD: { 747 GZExtLoad *Ld = cast<GZExtLoad>(&MI); 748 749 // FIXME: We need an in-memory type representation. 750 if (DstTy.isVector()) 751 return 1; 752 753 unsigned NumBits = computeNumSignBitsFromRangeMetadata(Ld, TyBits); 754 if (NumBits != 1) 755 return NumBits; 756 757 // e.g. i16->i32 = '16' bits known. 758 const MachineMemOperand *MMO = *MI.memoperands_begin(); 759 return TyBits - MMO->getSizeInBits().getValue(); 760 } 761 case TargetOpcode::G_AND: 762 case TargetOpcode::G_OR: 763 case TargetOpcode::G_XOR: { 764 Register Src1 = MI.getOperand(1).getReg(); 765 unsigned Src1NumSignBits = 766 computeNumSignBits(Src1, DemandedElts, Depth + 1); 767 if (Src1NumSignBits != 1) { 768 Register Src2 = MI.getOperand(2).getReg(); 769 unsigned Src2NumSignBits = 770 computeNumSignBits(Src2, DemandedElts, Depth + 1); 771 FirstAnswer = std::min(Src1NumSignBits, Src2NumSignBits); 772 } 773 break; 774 } 775 case TargetOpcode::G_TRUNC: { 776 Register Src = MI.getOperand(1).getReg(); 777 LLT SrcTy = MRI.getType(Src); 778 779 // Check if the sign bits of source go down as far as the truncated value. 780 unsigned DstTyBits = DstTy.getScalarSizeInBits(); 781 unsigned NumSrcBits = SrcTy.getScalarSizeInBits(); 782 unsigned NumSrcSignBits = computeNumSignBits(Src, DemandedElts, Depth + 1); 783 if (NumSrcSignBits > (NumSrcBits - DstTyBits)) 784 return NumSrcSignBits - (NumSrcBits - DstTyBits); 785 break; 786 } 787 case TargetOpcode::G_SELECT: { 788 return computeNumSignBitsMin(MI.getOperand(2).getReg(), 789 MI.getOperand(3).getReg(), DemandedElts, 790 Depth + 1); 791 } 792 case TargetOpcode::G_SMIN: 793 case TargetOpcode::G_SMAX: 794 case TargetOpcode::G_UMIN: 795 case TargetOpcode::G_UMAX: 796 // TODO: Handle clamp pattern with number of sign bits for SMIN/SMAX. 797 return computeNumSignBitsMin(MI.getOperand(1).getReg(), 798 MI.getOperand(2).getReg(), DemandedElts, 799 Depth + 1); 800 case TargetOpcode::G_SADDO: 801 case TargetOpcode::G_SADDE: 802 case TargetOpcode::G_UADDO: 803 case TargetOpcode::G_UADDE: 804 case TargetOpcode::G_SSUBO: 805 case TargetOpcode::G_SSUBE: 806 case TargetOpcode::G_USUBO: 807 case TargetOpcode::G_USUBE: 808 case TargetOpcode::G_SMULO: 809 case TargetOpcode::G_UMULO: { 810 // If compares returns 0/-1, all bits are sign bits. 811 // We know that we have an integer-based boolean since these operations 812 // are only available for integer. 813 if (MI.getOperand(1).getReg() == R) { 814 if (TL.getBooleanContents(DstTy.isVector(), false) == 815 TargetLowering::ZeroOrNegativeOneBooleanContent) 816 return TyBits; 817 } 818 819 break; 820 } 821 case TargetOpcode::G_FCMP: 822 case TargetOpcode::G_ICMP: { 823 bool IsFP = Opcode == TargetOpcode::G_FCMP; 824 if (TyBits == 1) 825 break; 826 auto BC = TL.getBooleanContents(DstTy.isVector(), IsFP); 827 if (BC == TargetLoweringBase::ZeroOrNegativeOneBooleanContent) 828 return TyBits; // All bits are sign bits. 829 if (BC == TargetLowering::ZeroOrOneBooleanContent) 830 return TyBits - 1; // Every always-zero bit is a sign bit. 831 break; 832 } 833 case TargetOpcode::G_INTRINSIC: 834 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 835 case TargetOpcode::G_INTRINSIC_CONVERGENT: 836 case TargetOpcode::G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: 837 default: { 838 unsigned NumBits = 839 TL.computeNumSignBitsForTargetInstr(*this, R, DemandedElts, MRI, Depth); 840 if (NumBits > 1) 841 FirstAnswer = std::max(FirstAnswer, NumBits); 842 break; 843 } 844 } 845 846 // Finally, if we can prove that the top bits of the result are 0's or 1's, 847 // use this information. 848 KnownBits Known = getKnownBits(R, DemandedElts, Depth); 849 APInt Mask; 850 if (Known.isNonNegative()) { // sign bit is 0 851 Mask = Known.Zero; 852 } else if (Known.isNegative()) { // sign bit is 1; 853 Mask = Known.One; 854 } else { 855 // Nothing known. 856 return FirstAnswer; 857 } 858 859 // Okay, we know that the sign bit in Mask is set. Use CLO to determine 860 // the number of identical bits in the top of the input value. 861 Mask <<= Mask.getBitWidth() - TyBits; 862 return std::max(FirstAnswer, Mask.countl_one()); 863 } 864 865 unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) { 866 LLT Ty = MRI.getType(R); 867 APInt DemandedElts = 868 Ty.isVector() ? APInt::getAllOnes(Ty.getNumElements()) : APInt(1, 1); 869 return computeNumSignBits(R, DemandedElts, Depth); 870 } 871 872 void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 873 AU.setPreservesAll(); 874 MachineFunctionPass::getAnalysisUsage(AU); 875 } 876 877 bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) { 878 return false; 879 } 880 881 GISelKnownBits &GISelKnownBitsAnalysis::get(MachineFunction &MF) { 882 if (!Info) { 883 unsigned MaxDepth = 884 MF.getTarget().getOptLevel() == CodeGenOptLevel::None ? 2 : 6; 885 Info = std::make_unique<GISelKnownBits>(MF, MaxDepth); 886 } 887 return *Info; 888 } 889