1 //===- X86LegalizerInfo.cpp --------------------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the Machinelegalizer class for X86. 10 /// \todo This should be generated by TableGen. 11 //===----------------------------------------------------------------------===// 12 13 #include "X86LegalizerInfo.h" 14 #include "X86Subtarget.h" 15 #include "X86TargetMachine.h" 16 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 17 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 18 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 19 #include "llvm/CodeGen/MachineConstantPool.h" 20 #include "llvm/CodeGen/TargetOpcodes.h" 21 #include "llvm/CodeGen/ValueTypes.h" 22 #include "llvm/IR/DerivedTypes.h" 23 #include "llvm/IR/Type.h" 24 25 using namespace llvm; 26 using namespace TargetOpcode; 27 using namespace LegalizeActions; 28 using namespace LegalityPredicates; 29 30 X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI, 31 const X86TargetMachine &TM) 32 : Subtarget(STI) { 33 34 bool Is64Bit = Subtarget.is64Bit(); 35 bool HasCMOV = Subtarget.canUseCMOV(); 36 bool HasSSE1 = Subtarget.hasSSE1(); 37 bool HasSSE2 = Subtarget.hasSSE2(); 38 bool HasSSE41 = Subtarget.hasSSE41(); 39 bool HasAVX = Subtarget.hasAVX(); 40 bool HasAVX2 = Subtarget.hasAVX2(); 41 bool HasAVX512 = Subtarget.hasAVX512(); 42 bool HasVLX = Subtarget.hasVLX(); 43 bool HasDQI = Subtarget.hasAVX512() && Subtarget.hasDQI(); 44 bool HasBWI = Subtarget.hasAVX512() && Subtarget.hasBWI(); 45 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87(); 46 47 const LLT p0 = LLT::pointer(0, TM.getPointerSizeInBits(0)); 48 const LLT s1 = LLT::scalar(1); 49 const LLT s8 = LLT::scalar(8); 50 const LLT s16 = LLT::scalar(16); 51 const LLT s32 = LLT::scalar(32); 52 const LLT s64 = LLT::scalar(64); 53 const LLT s80 = LLT::scalar(80); 54 const LLT s128 = LLT::scalar(128); 55 const LLT sMaxScalar = Subtarget.is64Bit() ? s64 : s32; 56 const LLT v2s32 = LLT::fixed_vector(2, 32); 57 const LLT v4s8 = LLT::fixed_vector(4, 8); 58 59 60 const LLT v16s8 = LLT::fixed_vector(16, 8); 61 const LLT v8s16 = LLT::fixed_vector(8, 16); 62 const LLT v4s32 = LLT::fixed_vector(4, 32); 63 const LLT v2s64 = LLT::fixed_vector(2, 64); 64 const LLT v2p0 = LLT::fixed_vector(2, p0); 65 66 const LLT v32s8 = LLT::fixed_vector(32, 8); 67 const LLT v16s16 = LLT::fixed_vector(16, 16); 68 const LLT v8s32 = LLT::fixed_vector(8, 32); 69 const LLT v4s64 = LLT::fixed_vector(4, 64); 70 const LLT v4p0 = LLT::fixed_vector(4, p0); 71 72 const LLT v64s8 = LLT::fixed_vector(64, 8); 73 const LLT v32s16 = LLT::fixed_vector(32, 16); 74 const LLT v16s32 = LLT::fixed_vector(16, 32); 75 const LLT v8s64 = LLT::fixed_vector(8, 64); 76 77 const LLT s8MaxVector = HasAVX512 ? v64s8 : HasAVX ? v32s8 : v16s8; 78 const LLT s16MaxVector = HasAVX512 ? v32s16 : HasAVX ? v16s16 : v8s16; 79 const LLT s32MaxVector = HasAVX512 ? v16s32 : HasAVX ? v8s32 : v4s32; 80 const LLT s64MaxVector = HasAVX512 ? v8s64 : HasAVX ? v4s64 : v2s64; 81 82 // todo: AVX512 bool vector predicate types 83 84 // implicit/constants 85 getActionDefinitionsBuilder(G_IMPLICIT_DEF) 86 .legalIf([=](const LegalityQuery &Query) -> bool { 87 // 32/64-bits needs support for s64/s128 to handle cases: 88 // s64 = EXTEND (G_IMPLICIT_DEF s32) -> s64 = G_IMPLICIT_DEF 89 // s128 = EXTEND (G_IMPLICIT_DEF s32/s64) -> s128 = G_IMPLICIT_DEF 90 return typeInSet(0, {p0, s1, s8, s16, s32, s64})(Query) || 91 (Is64Bit && typeInSet(0, {s128})(Query)); 92 }); 93 94 getActionDefinitionsBuilder(G_CONSTANT) 95 .legalIf([=](const LegalityQuery &Query) -> bool { 96 return typeInSet(0, {p0, s8, s16, s32})(Query) || 97 (Is64Bit && typeInSet(0, {s64})(Query)); 98 }) 99 .widenScalarToNextPow2(0, /*Min=*/8) 100 .clampScalar(0, s8, sMaxScalar); 101 102 // merge/unmerge 103 for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { 104 unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1; 105 unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0; 106 getActionDefinitionsBuilder(Op) 107 .widenScalarToNextPow2(LitTyIdx, /*Min=*/8) 108 .widenScalarToNextPow2(BigTyIdx, /*Min=*/16) 109 .minScalar(LitTyIdx, s8) 110 .minScalar(BigTyIdx, s32) 111 .legalIf([=](const LegalityQuery &Q) { 112 switch (Q.Types[BigTyIdx].getSizeInBits()) { 113 case 16: 114 case 32: 115 case 64: 116 case 128: 117 case 256: 118 case 512: 119 break; 120 default: 121 return false; 122 } 123 switch (Q.Types[LitTyIdx].getSizeInBits()) { 124 case 8: 125 case 16: 126 case 32: 127 case 64: 128 case 128: 129 case 256: 130 return true; 131 default: 132 return false; 133 } 134 }); 135 } 136 137 // integer addition/subtraction 138 getActionDefinitionsBuilder({G_ADD, G_SUB}) 139 .legalIf([=](const LegalityQuery &Query) -> bool { 140 if (typeInSet(0, {s8, s16, s32})(Query)) 141 return true; 142 if (Is64Bit && typeInSet(0, {s64})(Query)) 143 return true; 144 if (HasSSE2 && typeInSet(0, {v16s8, v8s16, v4s32, v2s64})(Query)) 145 return true; 146 if (HasAVX2 && typeInSet(0, {v32s8, v16s16, v8s32, v4s64})(Query)) 147 return true; 148 if (HasAVX512 && typeInSet(0, {v16s32, v8s64})(Query)) 149 return true; 150 if (HasBWI && typeInSet(0, {v64s8, v32s16})(Query)) 151 return true; 152 return false; 153 }) 154 .clampMinNumElements(0, s8, 16) 155 .clampMinNumElements(0, s16, 8) 156 .clampMinNumElements(0, s32, 4) 157 .clampMinNumElements(0, s64, 2) 158 .clampMaxNumElements(0, s8, HasBWI ? 64 : (HasAVX2 ? 32 : 16)) 159 .clampMaxNumElements(0, s16, HasBWI ? 32 : (HasAVX2 ? 16 : 8)) 160 .clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX2 ? 8 : 4)) 161 .clampMaxNumElements(0, s64, HasAVX512 ? 8 : (HasAVX2 ? 4 : 2)) 162 .widenScalarToNextPow2(0, /*Min=*/32) 163 .clampScalar(0, s8, sMaxScalar) 164 .scalarize(0); 165 166 getActionDefinitionsBuilder({G_UADDE, G_UADDO, G_USUBE, G_USUBO}) 167 .legalIf([=](const LegalityQuery &Query) -> bool { 168 return typePairInSet(0, 1, {{s8, s1}, {s16, s1}, {s32, s1}})(Query) || 169 (Is64Bit && typePairInSet(0, 1, {{s64, s1}})(Query)); 170 }) 171 .widenScalarToNextPow2(0, /*Min=*/32) 172 .clampScalar(0, s8, sMaxScalar) 173 .clampScalar(1, s1, s1) 174 .scalarize(0); 175 176 // integer multiply 177 getActionDefinitionsBuilder(G_MUL) 178 .legalIf([=](const LegalityQuery &Query) -> bool { 179 if (typeInSet(0, {s8, s16, s32})(Query)) 180 return true; 181 if (Is64Bit && typeInSet(0, {s64})(Query)) 182 return true; 183 if (HasSSE2 && typeInSet(0, {v8s16})(Query)) 184 return true; 185 if (HasSSE41 && typeInSet(0, {v4s32})(Query)) 186 return true; 187 if (HasAVX2 && typeInSet(0, {v16s16, v8s32})(Query)) 188 return true; 189 if (HasAVX512 && typeInSet(0, {v16s32})(Query)) 190 return true; 191 if (HasDQI && typeInSet(0, {v8s64})(Query)) 192 return true; 193 if (HasDQI && HasVLX && typeInSet(0, {v2s64, v4s64})(Query)) 194 return true; 195 if (HasBWI && typeInSet(0, {v32s16})(Query)) 196 return true; 197 return false; 198 }) 199 .clampMinNumElements(0, s16, 8) 200 .clampMinNumElements(0, s32, 4) 201 .clampMinNumElements(0, s64, HasVLX ? 2 : 8) 202 .clampMaxNumElements(0, s16, HasBWI ? 32 : (HasAVX2 ? 16 : 8)) 203 .clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX2 ? 8 : 4)) 204 .clampMaxNumElements(0, s64, 8) 205 .widenScalarToNextPow2(0, /*Min=*/32) 206 .clampScalar(0, s8, sMaxScalar) 207 .scalarize(0); 208 209 getActionDefinitionsBuilder({G_SMULH, G_UMULH}) 210 .legalIf([=](const LegalityQuery &Query) -> bool { 211 return typeInSet(0, {s8, s16, s32})(Query) || 212 (Is64Bit && typeInSet(0, {s64})(Query)); 213 }) 214 .widenScalarToNextPow2(0, /*Min=*/32) 215 .clampScalar(0, s8, sMaxScalar) 216 .scalarize(0); 217 218 // integer divisions 219 getActionDefinitionsBuilder({G_SDIV, G_SREM, G_UDIV, G_UREM}) 220 .legalIf([=](const LegalityQuery &Query) -> bool { 221 return typeInSet(0, {s8, s16, s32})(Query) || 222 (Is64Bit && typeInSet(0, {s64})(Query)); 223 }) 224 .libcallFor({s64}) 225 .clampScalar(0, s8, sMaxScalar); 226 227 // integer shifts 228 getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR}) 229 .legalIf([=](const LegalityQuery &Query) -> bool { 230 return typePairInSet(0, 1, {{s8, s8}, {s16, s8}, {s32, s8}})(Query) || 231 (Is64Bit && typePairInSet(0, 1, {{s64, s8}})(Query)); 232 }) 233 .clampScalar(0, s8, sMaxScalar) 234 .clampScalar(1, s8, s8); 235 236 // integer logic 237 getActionDefinitionsBuilder({G_AND, G_OR, G_XOR}) 238 .legalIf([=](const LegalityQuery &Query) -> bool { 239 if (typeInSet(0, {s8, s16, s32})(Query)) 240 return true; 241 if (Is64Bit && typeInSet(0, {s64})(Query)) 242 return true; 243 if (HasSSE2 && typeInSet(0, {v16s8, v8s16, v4s32, v2s64})(Query)) 244 return true; 245 if (HasAVX && typeInSet(0, {v32s8, v16s16, v8s32, v4s64})(Query)) 246 return true; 247 if (HasAVX512 && typeInSet(0, {v64s8, v32s16, v16s32, v8s64})(Query)) 248 return true; 249 return false; 250 }) 251 .clampMinNumElements(0, s8, 16) 252 .clampMinNumElements(0, s16, 8) 253 .clampMinNumElements(0, s32, 4) 254 .clampMinNumElements(0, s64, 2) 255 .clampMaxNumElements(0, s8, HasAVX512 ? 64 : (HasAVX ? 32 : 16)) 256 .clampMaxNumElements(0, s16, HasAVX512 ? 32 : (HasAVX ? 16 : 8)) 257 .clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX ? 8 : 4)) 258 .clampMaxNumElements(0, s64, HasAVX512 ? 8 : (HasAVX ? 4 : 2)) 259 .widenScalarToNextPow2(0, /*Min=*/32) 260 .clampScalar(0, s8, sMaxScalar) 261 .scalarize(0); 262 263 // integer comparison 264 const std::initializer_list<LLT> IntTypes32 = {s8, s16, s32, p0}; 265 const std::initializer_list<LLT> IntTypes64 = {s8, s16, s32, s64, p0}; 266 267 getActionDefinitionsBuilder(G_ICMP) 268 .legalForCartesianProduct({s8}, Is64Bit ? IntTypes64 : IntTypes32) 269 .clampScalar(0, s8, s8) 270 .clampScalar(1, s8, sMaxScalar); 271 272 // bswap 273 getActionDefinitionsBuilder(G_BSWAP) 274 .legalIf([=](const LegalityQuery &Query) { 275 return Query.Types[0] == s32 || 276 (Subtarget.is64Bit() && Query.Types[0] == s64); 277 }) 278 .widenScalarToNextPow2(0, /*Min=*/32) 279 .clampScalar(0, s32, sMaxScalar); 280 281 // popcount 282 getActionDefinitionsBuilder(G_CTPOP) 283 .legalIf([=](const LegalityQuery &Query) -> bool { 284 return Subtarget.hasPOPCNT() && 285 (typePairInSet(0, 1, {{s16, s16}, {s32, s32}})(Query) || 286 (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))); 287 }) 288 .widenScalarToNextPow2(1, /*Min=*/16) 289 .clampScalar(1, s16, sMaxScalar) 290 .scalarSameSizeAs(0, 1); 291 292 // count leading zeros (LZCNT) 293 getActionDefinitionsBuilder(G_CTLZ) 294 .legalIf([=](const LegalityQuery &Query) -> bool { 295 return Subtarget.hasLZCNT() && 296 (typePairInSet(0, 1, {{s16, s16}, {s32, s32}})(Query) || 297 (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))); 298 }) 299 .widenScalarToNextPow2(1, /*Min=*/16) 300 .clampScalar(1, s16, sMaxScalar) 301 .scalarSameSizeAs(0, 1); 302 303 // count trailing zeros 304 getActionDefinitionsBuilder({G_CTTZ_ZERO_UNDEF, G_CTTZ}) 305 .legalIf([=](const LegalityQuery &Query) -> bool { 306 return (Query.Opcode == G_CTTZ_ZERO_UNDEF || Subtarget.hasBMI()) && 307 (typePairInSet(0, 1, {{s16, s16}, {s32, s32}})(Query) || 308 (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))); 309 }) 310 .widenScalarToNextPow2(1, /*Min=*/16) 311 .clampScalar(1, s16, sMaxScalar) 312 .scalarSameSizeAs(0, 1); 313 314 // control flow 315 getActionDefinitionsBuilder(G_PHI) 316 .legalIf([=](const LegalityQuery &Query) -> bool { 317 return typeInSet(0, {s8, s16, s32, p0})(Query) || 318 (UseX87 && typeIs(0, s80)(Query)) || 319 (Is64Bit && typeIs(0, s64)(Query)) || 320 (HasSSE1 && typeInSet(0, {v16s8, v8s16, v4s32, v2s64})(Query)) || 321 (HasAVX && typeInSet(0, {v32s8, v16s16, v8s32, v4s64})(Query)) || 322 (HasAVX512 && 323 typeInSet(0, {v64s8, v32s16, v16s32, v8s64})(Query)); 324 }) 325 .clampMinNumElements(0, s8, 16) 326 .clampMinNumElements(0, s16, 8) 327 .clampMinNumElements(0, s32, 4) 328 .clampMinNumElements(0, s64, 2) 329 .clampMaxNumElements(0, s8, HasAVX512 ? 64 : (HasAVX ? 32 : 16)) 330 .clampMaxNumElements(0, s16, HasAVX512 ? 32 : (HasAVX ? 16 : 8)) 331 .clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX ? 8 : 4)) 332 .clampMaxNumElements(0, s64, HasAVX512 ? 8 : (HasAVX ? 4 : 2)) 333 .widenScalarToNextPow2(0, /*Min=*/32) 334 .clampScalar(0, s8, sMaxScalar) 335 .scalarize(0); 336 337 getActionDefinitionsBuilder(G_BRCOND).legalFor({s1}); 338 339 // pointer handling 340 const std::initializer_list<LLT> PtrTypes32 = {s1, s8, s16, s32}; 341 const std::initializer_list<LLT> PtrTypes64 = {s1, s8, s16, s32, s64}; 342 343 getActionDefinitionsBuilder(G_PTRTOINT) 344 .legalForCartesianProduct(Is64Bit ? PtrTypes64 : PtrTypes32, {p0}) 345 .maxScalar(0, sMaxScalar) 346 .widenScalarToNextPow2(0, /*Min*/ 8); 347 348 getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, sMaxScalar}}); 349 350 getActionDefinitionsBuilder(G_CONSTANT_POOL).legalFor({p0}); 351 352 getActionDefinitionsBuilder(G_PTR_ADD) 353 .legalIf([=](const LegalityQuery &Query) -> bool { 354 return typePairInSet(0, 1, {{p0, s32}})(Query) || 355 (Is64Bit && typePairInSet(0, 1, {{p0, s64}})(Query)); 356 }) 357 .widenScalarToNextPow2(1, /*Min*/ 32) 358 .clampScalar(1, s32, sMaxScalar); 359 360 getActionDefinitionsBuilder({G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor({p0}); 361 362 // load/store: add more corner cases 363 for (unsigned Op : {G_LOAD, G_STORE}) { 364 auto &Action = getActionDefinitionsBuilder(Op); 365 Action.legalForTypesWithMemDesc({{s8, p0, s1, 1}, 366 {s8, p0, s8, 1}, 367 {s16, p0, s8, 1}, 368 {s16, p0, s16, 1}, 369 {s32, p0, s8, 1}, 370 {s32, p0, s16, 1}, 371 {s32, p0, s32, 1}, 372 {s80, p0, s80, 1}, 373 {p0, p0, p0, 1}, 374 {v4s8, p0, v4s8, 1}}); 375 if (Is64Bit) 376 Action.legalForTypesWithMemDesc({{s64, p0, s8, 1}, 377 {s64, p0, s16, 1}, 378 {s64, p0, s32, 1}, 379 {s64, p0, s64, 1}, 380 {v2s32, p0, v2s32, 1}}); 381 if (HasSSE1) 382 Action.legalForTypesWithMemDesc({{v4s32, p0, v4s32, 1}}); 383 if (HasSSE2) 384 Action.legalForTypesWithMemDesc({{v16s8, p0, v16s8, 1}, 385 {v8s16, p0, v8s16, 1}, 386 {v2s64, p0, v2s64, 1}, 387 {v2p0, p0, v2p0, 1}}); 388 if (HasAVX) 389 Action.legalForTypesWithMemDesc({{v32s8, p0, v32s8, 1}, 390 {v16s16, p0, v16s16, 1}, 391 {v8s32, p0, v8s32, 1}, 392 {v4s64, p0, v4s64, 1}, 393 {v4p0, p0, v4p0, 1}}); 394 if (HasAVX512) 395 Action.legalForTypesWithMemDesc({{v64s8, p0, v64s8, 1}, 396 {v32s16, p0, v32s16, 1}, 397 {v16s32, p0, v16s32, 1}, 398 {v8s64, p0, v8s64, 1}}); 399 Action.widenScalarToNextPow2(0, /*Min=*/8) 400 .clampScalar(0, s8, sMaxScalar) 401 .scalarize(0); 402 } 403 404 for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) { 405 auto &Action = getActionDefinitionsBuilder(Op); 406 Action.legalForTypesWithMemDesc({{s16, p0, s8, 1}, 407 {s32, p0, s8, 1}, 408 {s32, p0, s16, 1}}); 409 if (Is64Bit) 410 Action.legalForTypesWithMemDesc({{s64, p0, s8, 1}, 411 {s64, p0, s16, 1}, 412 {s64, p0, s32, 1}}); 413 // TODO - SSE41/AVX2/AVX512F/AVX512BW vector extensions 414 } 415 416 // sext, zext, and anyext 417 getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT}) 418 .legalIf([=](const LegalityQuery &Query) { 419 return typeInSet(0, {s8, s16, s32})(Query) || 420 (Query.Opcode == G_ANYEXT && Query.Types[0] == s128) || 421 (Is64Bit && Query.Types[0] == s64); 422 }) 423 .widenScalarToNextPow2(0, /*Min=*/8) 424 .clampScalar(0, s8, sMaxScalar) 425 .widenScalarToNextPow2(1, /*Min=*/8) 426 .clampScalar(1, s8, sMaxScalar) 427 .scalarize(0); 428 429 getActionDefinitionsBuilder(G_SEXT_INREG).lower(); 430 431 // fp constants 432 getActionDefinitionsBuilder(G_FCONSTANT) 433 .legalIf([=](const LegalityQuery &Query) -> bool { 434 return (typeInSet(0, {s32, s64})(Query)) || 435 (UseX87 && typeInSet(0, {s80})(Query)); 436 }); 437 438 // fp arithmetic 439 getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV}) 440 .legalIf([=](const LegalityQuery &Query) { 441 return (typeInSet(0, {s32, s64})(Query)) || 442 (HasSSE1 && typeInSet(0, {v4s32})(Query)) || 443 (HasSSE2 && typeInSet(0, {v2s64})(Query)) || 444 (HasAVX && typeInSet(0, {v8s32, v4s64})(Query)) || 445 (HasAVX512 && typeInSet(0, {v16s32, v8s64})(Query)) || 446 (UseX87 && typeInSet(0, {s80})(Query)); 447 }); 448 449 // fp comparison 450 getActionDefinitionsBuilder(G_FCMP) 451 .legalIf([=](const LegalityQuery &Query) { 452 return (HasSSE1 && typePairInSet(0, 1, {{s8, s32}})(Query)) || 453 (HasSSE2 && typePairInSet(0, 1, {{s8, s64}})(Query)); 454 }) 455 .clampScalar(0, s8, s8) 456 .clampScalar(1, s32, HasSSE2 ? s64 : s32) 457 .widenScalarToNextPow2(1); 458 459 // fp conversions 460 getActionDefinitionsBuilder(G_FPEXT).legalIf([=](const LegalityQuery &Query) { 461 return (HasSSE2 && typePairInSet(0, 1, {{s64, s32}})(Query)) || 462 (HasAVX && typePairInSet(0, 1, {{v4s64, v4s32}})(Query)) || 463 (HasAVX512 && typePairInSet(0, 1, {{v8s64, v8s32}})(Query)); 464 }); 465 466 getActionDefinitionsBuilder(G_FPTRUNC).legalIf( 467 [=](const LegalityQuery &Query) { 468 return (HasSSE2 && typePairInSet(0, 1, {{s32, s64}})(Query)) || 469 (HasAVX && typePairInSet(0, 1, {{v4s32, v4s64}})(Query)) || 470 (HasAVX512 && typePairInSet(0, 1, {{v8s32, v8s64}})(Query)); 471 }); 472 473 getActionDefinitionsBuilder(G_SITOFP) 474 .legalIf([=](const LegalityQuery &Query) { 475 return (HasSSE1 && 476 (typePairInSet(0, 1, {{s32, s32}})(Query) || 477 (Is64Bit && typePairInSet(0, 1, {{s32, s64}})(Query)))) || 478 (HasSSE2 && 479 (typePairInSet(0, 1, {{s64, s32}})(Query) || 480 (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query)))); 481 }) 482 .clampScalar(1, s32, sMaxScalar) 483 .widenScalarToNextPow2(1) 484 .clampScalar(0, s32, HasSSE2 ? s64 : s32) 485 .widenScalarToNextPow2(0); 486 487 getActionDefinitionsBuilder(G_FPTOSI) 488 .legalIf([=](const LegalityQuery &Query) { 489 return (HasSSE1 && 490 (typePairInSet(0, 1, {{s32, s32}})(Query) || 491 (Is64Bit && typePairInSet(0, 1, {{s64, s32}})(Query)))) || 492 (HasSSE2 && 493 (typePairInSet(0, 1, {{s32, s64}})(Query) || 494 (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query)))); 495 }) 496 .clampScalar(1, s32, HasSSE2 ? s64 : s32) 497 .widenScalarToNextPow2(0) 498 .clampScalar(0, s32, sMaxScalar) 499 .widenScalarToNextPow2(1); 500 501 // For G_UITOFP and G_FPTOUI without AVX512, we have to custom legalize types 502 // <= s32 manually. Otherwise, in custom handler there is no way to 503 // understand whether s32 is an original type and we need to promote it to 504 // s64 or s32 is obtained after widening and we shouldn't widen it to s64. 505 // 506 // For AVX512 we simply widen types as there is direct mapping from opcodes 507 // to asm instructions. 508 getActionDefinitionsBuilder(G_UITOFP) 509 .legalIf([=](const LegalityQuery &Query) { 510 return HasAVX512 && typeInSet(0, {s32, s64})(Query) && 511 typeInSet(1, {s32, s64})(Query); 512 }) 513 .customIf([=](const LegalityQuery &Query) { 514 return !HasAVX512 && 515 ((HasSSE1 && typeIs(0, s32)(Query)) || 516 (HasSSE2 && typeIs(0, s64)(Query))) && 517 scalarNarrowerThan(1, Is64Bit ? 64 : 32)(Query); 518 }) 519 .lowerIf([=](const LegalityQuery &Query) { 520 // Lower conversions from s64 521 return !HasAVX512 && 522 ((HasSSE1 && typeIs(0, s32)(Query)) || 523 (HasSSE2 && typeIs(0, s64)(Query))) && 524 (Is64Bit && typeIs(1, s64)(Query)); 525 }) 526 .clampScalar(0, s32, HasSSE2 ? s64 : s32) 527 .widenScalarToNextPow2(0) 528 .clampScalar(1, s32, sMaxScalar) 529 .widenScalarToNextPow2(1); 530 531 getActionDefinitionsBuilder(G_FPTOUI) 532 .legalIf([=](const LegalityQuery &Query) { 533 return HasAVX512 && typeInSet(0, {s32, s64})(Query) && 534 typeInSet(1, {s32, s64})(Query); 535 }) 536 .customIf([=](const LegalityQuery &Query) { 537 return !HasAVX512 && 538 ((HasSSE1 && typeIs(1, s32)(Query)) || 539 (HasSSE2 && typeIs(1, s64)(Query))) && 540 scalarNarrowerThan(0, Is64Bit ? 64 : 32)(Query); 541 }) 542 // TODO: replace with customized legalization using 543 // specifics of cvttsd2si. The selection of this node requires 544 // a vector type. Either G_SCALAR_TO_VECTOR is needed or more advanced 545 // support of G_BUILD_VECTOR/G_INSERT_VECTOR_ELT is required beforehand. 546 .lowerIf([=](const LegalityQuery &Query) { 547 return !HasAVX512 && 548 ((HasSSE1 && typeIs(1, s32)(Query)) || 549 (HasSSE2 && typeIs(1, s64)(Query))) && 550 (Is64Bit && typeIs(0, s64)(Query)); 551 }) 552 .clampScalar(0, s32, sMaxScalar) 553 .widenScalarToNextPow2(0) 554 .clampScalar(1, s32, HasSSE2 ? s64 : s32) 555 .widenScalarToNextPow2(1); 556 557 // vector ops 558 getActionDefinitionsBuilder(G_BUILD_VECTOR) 559 .customIf([=](const LegalityQuery &Query) { 560 return (HasSSE1 && typeInSet(0, {v4s32})(Query)) || 561 (HasSSE2 && typeInSet(0, {v2s64, v8s16, v16s8})(Query)) || 562 (HasAVX && typeInSet(0, {v4s64, v8s32, v16s16, v32s8})(Query)) || 563 (HasAVX512 && typeInSet(0, {v8s64, v16s32, v32s16, v64s8})); 564 }) 565 .clampNumElements(0, v16s8, s8MaxVector) 566 .clampNumElements(0, v8s16, s16MaxVector) 567 .clampNumElements(0, v4s32, s32MaxVector) 568 .clampNumElements(0, v2s64, s64MaxVector) 569 .moreElementsToNextPow2(0); 570 571 getActionDefinitionsBuilder({G_EXTRACT, G_INSERT}) 572 .legalIf([=](const LegalityQuery &Query) { 573 unsigned SubIdx = Query.Opcode == G_EXTRACT ? 0 : 1; 574 unsigned FullIdx = Query.Opcode == G_EXTRACT ? 1 : 0; 575 return (HasAVX && typePairInSet(SubIdx, FullIdx, 576 {{v16s8, v32s8}, 577 {v8s16, v16s16}, 578 {v4s32, v8s32}, 579 {v2s64, v4s64}})(Query)) || 580 (HasAVX512 && typePairInSet(SubIdx, FullIdx, 581 {{v16s8, v64s8}, 582 {v32s8, v64s8}, 583 {v8s16, v32s16}, 584 {v16s16, v32s16}, 585 {v4s32, v16s32}, 586 {v8s32, v16s32}, 587 {v2s64, v8s64}, 588 {v4s64, v8s64}})(Query)); 589 }); 590 591 // todo: only permit dst types up to max legal vector register size? 592 getActionDefinitionsBuilder(G_CONCAT_VECTORS) 593 .legalIf([=](const LegalityQuery &Query) { 594 return (HasSSE1 && typePairInSet(1, 0, 595 {{v16s8, v32s8}, 596 {v8s16, v16s16}, 597 {v4s32, v8s32}, 598 {v2s64, v4s64}})(Query)) || 599 (HasAVX && typePairInSet(1, 0, 600 {{v16s8, v64s8}, 601 {v32s8, v64s8}, 602 {v8s16, v32s16}, 603 {v16s16, v32s16}, 604 {v4s32, v16s32}, 605 {v8s32, v16s32}, 606 {v2s64, v8s64}, 607 {v4s64, v8s64}})(Query)); 608 }); 609 610 // todo: vectors and address spaces 611 getActionDefinitionsBuilder(G_SELECT) 612 .legalFor({{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32}, {p0, s32}}) 613 .widenScalarToNextPow2(0, /*Min=*/8) 614 .clampScalar(0, HasCMOV ? s16 : s8, sMaxScalar) 615 .clampScalar(1, s32, s32); 616 617 // memory intrinsics 618 getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall(); 619 620 getActionDefinitionsBuilder({G_DYN_STACKALLOC, 621 G_STACKSAVE, 622 G_STACKRESTORE}).lower(); 623 624 // fp intrinsics 625 getActionDefinitionsBuilder(G_INTRINSIC_ROUNDEVEN) 626 .scalarize(0) 627 .minScalar(0, LLT::scalar(32)) 628 .libcall(); 629 630 getActionDefinitionsBuilder({G_FREEZE, G_CONSTANT_FOLD_BARRIER}) 631 .legalFor({s8, s16, s32, s64, p0}) 632 .widenScalarToNextPow2(0, /*Min=*/8) 633 .clampScalar(0, s8, sMaxScalar); 634 635 getLegacyLegalizerInfo().computeTables(); 636 verify(*STI.getInstrInfo()); 637 } 638 639 bool X86LegalizerInfo::legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, 640 LostDebugLocObserver &LocObserver) const { 641 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 642 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 643 switch (MI.getOpcode()) { 644 default: 645 // No idea what to do. 646 return false; 647 case TargetOpcode::G_BUILD_VECTOR: 648 return legalizeBuildVector(MI, MRI, Helper); 649 case TargetOpcode::G_FPTOUI: 650 return legalizeFPTOUI(MI, MRI, Helper); 651 case TargetOpcode::G_UITOFP: 652 return legalizeUITOFP(MI, MRI, Helper); 653 } 654 llvm_unreachable("expected switch to return"); 655 } 656 657 bool X86LegalizerInfo::legalizeBuildVector(MachineInstr &MI, 658 MachineRegisterInfo &MRI, 659 LegalizerHelper &Helper) const { 660 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 661 const auto &BuildVector = cast<GBuildVector>(MI); 662 Register Dst = BuildVector.getReg(0); 663 LLT DstTy = MRI.getType(Dst); 664 MachineFunction &MF = MIRBuilder.getMF(); 665 LLVMContext &Ctx = MF.getFunction().getContext(); 666 uint64_t DstTySize = DstTy.getScalarSizeInBits(); 667 668 SmallVector<Constant *, 4> CstIdxs; 669 for (unsigned i = 0; i < BuildVector.getNumSources(); ++i) { 670 Register Source = BuildVector.getSourceReg(i); 671 672 auto ValueAndReg = getIConstantVRegValWithLookThrough(Source, MRI); 673 if (ValueAndReg) { 674 CstIdxs.emplace_back(ConstantInt::get(Ctx, ValueAndReg->Value)); 675 continue; 676 } 677 678 auto FPValueAndReg = getFConstantVRegValWithLookThrough(Source, MRI); 679 if (FPValueAndReg) { 680 CstIdxs.emplace_back(ConstantFP::get(Ctx, FPValueAndReg->Value)); 681 continue; 682 } 683 684 if (getOpcodeDef<GImplicitDef>(Source, MRI)) { 685 CstIdxs.emplace_back(UndefValue::get(Type::getIntNTy(Ctx, DstTySize))); 686 continue; 687 } 688 return false; 689 } 690 691 Constant *ConstVal = ConstantVector::get(CstIdxs); 692 693 const DataLayout &DL = MIRBuilder.getDataLayout(); 694 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace(); 695 Align Alignment(DL.getABITypeAlign(ConstVal->getType())); 696 auto Addr = MIRBuilder.buildConstantPool( 697 LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)), 698 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment)); 699 MachineMemOperand *MMO = 700 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 701 MachineMemOperand::MOLoad, DstTy, Alignment); 702 703 MIRBuilder.buildLoad(Dst, Addr, *MMO); 704 MI.eraseFromParent(); 705 return true; 706 } 707 708 bool X86LegalizerInfo::legalizeFPTOUI(MachineInstr &MI, 709 MachineRegisterInfo &MRI, 710 LegalizerHelper &Helper) const { 711 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 712 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); 713 unsigned DstSizeInBits = DstTy.getScalarSizeInBits(); 714 const LLT s32 = LLT::scalar(32); 715 const LLT s64 = LLT::scalar(64); 716 717 // Simply reuse FPTOSI when it is possible to widen the type 718 if (DstSizeInBits <= 32) { 719 auto Casted = MIRBuilder.buildFPTOSI(DstTy == s32 ? s64 : s32, Src); 720 MIRBuilder.buildTrunc(Dst, Casted); 721 MI.eraseFromParent(); 722 return true; 723 } 724 725 return false; 726 } 727 728 bool X86LegalizerInfo::legalizeUITOFP(MachineInstr &MI, 729 MachineRegisterInfo &MRI, 730 LegalizerHelper &Helper) const { 731 MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; 732 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs(); 733 const LLT s32 = LLT::scalar(32); 734 const LLT s64 = LLT::scalar(64); 735 736 // Simply reuse SITOFP when it is possible to widen the type 737 if (SrcTy.getSizeInBits() <= 32) { 738 auto Ext = MIRBuilder.buildZExt(SrcTy == s32 ? s64 : s32, Src); 739 MIRBuilder.buildSITOFP(Dst, Ext); 740 MI.eraseFromParent(); 741 return true; 742 } 743 744 return false; 745 } 746 747 bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, 748 MachineInstr &MI) const { 749 return true; 750 } 751