1 //===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the auto-upgrade helper functions. 10 // This is where deprecated IR intrinsics and other IR features are updated to 11 // current specifications. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/IR/AutoUpgrade.h" 16 #include "llvm/ADT/StringRef.h" 17 #include "llvm/ADT/StringSwitch.h" 18 #include "llvm/BinaryFormat/Dwarf.h" 19 #include "llvm/IR/AttributeMask.h" 20 #include "llvm/IR/Constants.h" 21 #include "llvm/IR/DebugInfo.h" 22 #include "llvm/IR/DebugInfoMetadata.h" 23 #include "llvm/IR/DiagnosticInfo.h" 24 #include "llvm/IR/Function.h" 25 #include "llvm/IR/IRBuilder.h" 26 #include "llvm/IR/InstVisitor.h" 27 #include "llvm/IR/Instruction.h" 28 #include "llvm/IR/IntrinsicInst.h" 29 #include "llvm/IR/Intrinsics.h" 30 #include "llvm/IR/IntrinsicsAArch64.h" 31 #include "llvm/IR/IntrinsicsARM.h" 32 #include "llvm/IR/IntrinsicsNVPTX.h" 33 #include "llvm/IR/IntrinsicsRISCV.h" 34 #include "llvm/IR/IntrinsicsWebAssembly.h" 35 #include "llvm/IR/IntrinsicsX86.h" 36 #include "llvm/IR/LLVMContext.h" 37 #include "llvm/IR/MDBuilder.h" 38 #include "llvm/IR/Metadata.h" 39 #include "llvm/IR/Module.h" 40 #include "llvm/IR/Value.h" 41 #include "llvm/IR/Verifier.h" 42 #include "llvm/Support/AMDGPUAddrSpace.h" 43 #include "llvm/Support/CommandLine.h" 44 #include "llvm/Support/ErrorHandling.h" 45 #include "llvm/Support/Regex.h" 46 #include "llvm/TargetParser/Triple.h" 47 #include <cstring> 48 #include <numeric> 49 50 using namespace llvm; 51 52 static cl::opt<bool> 53 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", 54 cl::desc("Disable autoupgrade of debug info")); 55 56 static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); } 57 58 // Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have 59 // changed their type from v4f32 to v2i64. 60 static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, 61 Function *&NewFn) { 62 // Check whether this is an old version of the function, which received 63 // v4f32 arguments. 64 Type *Arg0Type = F->getFunctionType()->getParamType(0); 65 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4)) 66 return false; 67 68 // Yes, it's old, replace it with new version. 69 rename(F); 70 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID); 71 return true; 72 } 73 74 // Upgrade the declarations of intrinsic functions whose 8-bit immediate mask 75 // arguments have changed their type from i32 to i8. 76 static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, 77 Function *&NewFn) { 78 // Check that the last argument is an i32. 79 Type *LastArgType = F->getFunctionType()->getParamType( 80 F->getFunctionType()->getNumParams() - 1); 81 if (!LastArgType->isIntegerTy(32)) 82 return false; 83 84 // Move this function aside and map down. 85 rename(F); 86 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID); 87 return true; 88 } 89 90 // Upgrade the declaration of fp compare intrinsics that change return type 91 // from scalar to vXi1 mask. 92 static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, 93 Function *&NewFn) { 94 // Check if the return type is a vector. 95 if (F->getReturnType()->isVectorTy()) 96 return false; 97 98 rename(F); 99 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID); 100 return true; 101 } 102 103 static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, 104 Function *&NewFn) { 105 if (F->getReturnType()->getScalarType()->isBFloatTy()) 106 return false; 107 108 rename(F); 109 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID); 110 return true; 111 } 112 113 static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, 114 Function *&NewFn) { 115 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy()) 116 return false; 117 118 rename(F); 119 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID); 120 return true; 121 } 122 123 static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name) { 124 // All of the intrinsics matches below should be marked with which llvm 125 // version started autoupgrading them. At some point in the future we would 126 // like to use this information to remove upgrade code for some older 127 // intrinsics. It is currently undecided how we will determine that future 128 // point. 129 if (Name.consume_front("avx.")) 130 return (Name.starts_with("blend.p") || // Added in 3.7 131 Name == "cvt.ps2.pd.256" || // Added in 3.9 132 Name == "cvtdq2.pd.256" || // Added in 3.9 133 Name == "cvtdq2.ps.256" || // Added in 7.0 134 Name.starts_with("movnt.") || // Added in 3.2 135 Name.starts_with("sqrt.p") || // Added in 7.0 136 Name.starts_with("storeu.") || // Added in 3.9 137 Name.starts_with("vbroadcast.s") || // Added in 3.5 138 Name.starts_with("vbroadcastf128") || // Added in 4.0 139 Name.starts_with("vextractf128.") || // Added in 3.7 140 Name.starts_with("vinsertf128.") || // Added in 3.7 141 Name.starts_with("vperm2f128.") || // Added in 6.0 142 Name.starts_with("vpermil.")); // Added in 3.1 143 144 if (Name.consume_front("avx2.")) 145 return (Name == "movntdqa" || // Added in 5.0 146 Name.starts_with("pabs.") || // Added in 6.0 147 Name.starts_with("padds.") || // Added in 8.0 148 Name.starts_with("paddus.") || // Added in 8.0 149 Name.starts_with("pblendd.") || // Added in 3.7 150 Name == "pblendw" || // Added in 3.7 151 Name.starts_with("pbroadcast") || // Added in 3.8 152 Name.starts_with("pcmpeq.") || // Added in 3.1 153 Name.starts_with("pcmpgt.") || // Added in 3.1 154 Name.starts_with("pmax") || // Added in 3.9 155 Name.starts_with("pmin") || // Added in 3.9 156 Name.starts_with("pmovsx") || // Added in 3.9 157 Name.starts_with("pmovzx") || // Added in 3.9 158 Name == "pmul.dq" || // Added in 7.0 159 Name == "pmulu.dq" || // Added in 7.0 160 Name.starts_with("psll.dq") || // Added in 3.7 161 Name.starts_with("psrl.dq") || // Added in 3.7 162 Name.starts_with("psubs.") || // Added in 8.0 163 Name.starts_with("psubus.") || // Added in 8.0 164 Name.starts_with("vbroadcast") || // Added in 3.8 165 Name == "vbroadcasti128" || // Added in 3.7 166 Name == "vextracti128" || // Added in 3.7 167 Name == "vinserti128" || // Added in 3.7 168 Name == "vperm2i128"); // Added in 6.0 169 170 if (Name.consume_front("avx512.")) { 171 if (Name.consume_front("mask.")) 172 // 'avx512.mask.*' 173 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0 174 Name.starts_with("and.") || // Added in 3.9 175 Name.starts_with("andn.") || // Added in 3.9 176 Name.starts_with("broadcast.s") || // Added in 3.9 177 Name.starts_with("broadcastf32x4.") || // Added in 6.0 178 Name.starts_with("broadcastf32x8.") || // Added in 6.0 179 Name.starts_with("broadcastf64x2.") || // Added in 6.0 180 Name.starts_with("broadcastf64x4.") || // Added in 6.0 181 Name.starts_with("broadcasti32x4.") || // Added in 6.0 182 Name.starts_with("broadcasti32x8.") || // Added in 6.0 183 Name.starts_with("broadcasti64x2.") || // Added in 6.0 184 Name.starts_with("broadcasti64x4.") || // Added in 6.0 185 Name.starts_with("cmp.b") || // Added in 5.0 186 Name.starts_with("cmp.d") || // Added in 5.0 187 Name.starts_with("cmp.q") || // Added in 5.0 188 Name.starts_with("cmp.w") || // Added in 5.0 189 Name.starts_with("compress.b") || // Added in 9.0 190 Name.starts_with("compress.d") || // Added in 9.0 191 Name.starts_with("compress.p") || // Added in 9.0 192 Name.starts_with("compress.q") || // Added in 9.0 193 Name.starts_with("compress.store.") || // Added in 7.0 194 Name.starts_with("compress.w") || // Added in 9.0 195 Name.starts_with("conflict.") || // Added in 9.0 196 Name.starts_with("cvtdq2pd.") || // Added in 4.0 197 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0 198 Name == "cvtpd2dq.256" || // Added in 7.0 199 Name == "cvtpd2ps.256" || // Added in 7.0 200 Name == "cvtps2pd.128" || // Added in 7.0 201 Name == "cvtps2pd.256" || // Added in 7.0 202 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0 203 Name == "cvtqq2ps.256" || // Added in 9.0 204 Name == "cvtqq2ps.512" || // Added in 9.0 205 Name == "cvttpd2dq.256" || // Added in 7.0 206 Name == "cvttps2dq.128" || // Added in 7.0 207 Name == "cvttps2dq.256" || // Added in 7.0 208 Name.starts_with("cvtudq2pd.") || // Added in 4.0 209 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0 210 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0 211 Name == "cvtuqq2ps.256" || // Added in 9.0 212 Name == "cvtuqq2ps.512" || // Added in 9.0 213 Name.starts_with("dbpsadbw.") || // Added in 7.0 214 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0 215 Name.starts_with("expand.b") || // Added in 9.0 216 Name.starts_with("expand.d") || // Added in 9.0 217 Name.starts_with("expand.load.") || // Added in 7.0 218 Name.starts_with("expand.p") || // Added in 9.0 219 Name.starts_with("expand.q") || // Added in 9.0 220 Name.starts_with("expand.w") || // Added in 9.0 221 Name.starts_with("fpclass.p") || // Added in 7.0 222 Name.starts_with("insert") || // Added in 4.0 223 Name.starts_with("load.") || // Added in 3.9 224 Name.starts_with("loadu.") || // Added in 3.9 225 Name.starts_with("lzcnt.") || // Added in 5.0 226 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0 227 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0 228 Name.starts_with("movddup") || // Added in 3.9 229 Name.starts_with("move.s") || // Added in 4.0 230 Name.starts_with("movshdup") || // Added in 3.9 231 Name.starts_with("movsldup") || // Added in 3.9 232 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0 233 Name.starts_with("or.") || // Added in 3.9 234 Name.starts_with("pabs.") || // Added in 6.0 235 Name.starts_with("packssdw.") || // Added in 5.0 236 Name.starts_with("packsswb.") || // Added in 5.0 237 Name.starts_with("packusdw.") || // Added in 5.0 238 Name.starts_with("packuswb.") || // Added in 5.0 239 Name.starts_with("padd.") || // Added in 4.0 240 Name.starts_with("padds.") || // Added in 8.0 241 Name.starts_with("paddus.") || // Added in 8.0 242 Name.starts_with("palignr.") || // Added in 3.9 243 Name.starts_with("pand.") || // Added in 3.9 244 Name.starts_with("pandn.") || // Added in 3.9 245 Name.starts_with("pavg") || // Added in 6.0 246 Name.starts_with("pbroadcast") || // Added in 6.0 247 Name.starts_with("pcmpeq.") || // Added in 3.9 248 Name.starts_with("pcmpgt.") || // Added in 3.9 249 Name.starts_with("perm.df.") || // Added in 3.9 250 Name.starts_with("perm.di.") || // Added in 3.9 251 Name.starts_with("permvar.") || // Added in 7.0 252 Name.starts_with("pmaddubs.w.") || // Added in 7.0 253 Name.starts_with("pmaddw.d.") || // Added in 7.0 254 Name.starts_with("pmax") || // Added in 4.0 255 Name.starts_with("pmin") || // Added in 4.0 256 Name == "pmov.qd.256" || // Added in 9.0 257 Name == "pmov.qd.512" || // Added in 9.0 258 Name == "pmov.wb.256" || // Added in 9.0 259 Name == "pmov.wb.512" || // Added in 9.0 260 Name.starts_with("pmovsx") || // Added in 4.0 261 Name.starts_with("pmovzx") || // Added in 4.0 262 Name.starts_with("pmul.dq.") || // Added in 4.0 263 Name.starts_with("pmul.hr.sw.") || // Added in 7.0 264 Name.starts_with("pmulh.w.") || // Added in 7.0 265 Name.starts_with("pmulhu.w.") || // Added in 7.0 266 Name.starts_with("pmull.") || // Added in 4.0 267 Name.starts_with("pmultishift.qb.") || // Added in 8.0 268 Name.starts_with("pmulu.dq.") || // Added in 4.0 269 Name.starts_with("por.") || // Added in 3.9 270 Name.starts_with("prol.") || // Added in 8.0 271 Name.starts_with("prolv.") || // Added in 8.0 272 Name.starts_with("pror.") || // Added in 8.0 273 Name.starts_with("prorv.") || // Added in 8.0 274 Name.starts_with("pshuf.b.") || // Added in 4.0 275 Name.starts_with("pshuf.d.") || // Added in 3.9 276 Name.starts_with("pshufh.w.") || // Added in 3.9 277 Name.starts_with("pshufl.w.") || // Added in 3.9 278 Name.starts_with("psll.d") || // Added in 4.0 279 Name.starts_with("psll.q") || // Added in 4.0 280 Name.starts_with("psll.w") || // Added in 4.0 281 Name.starts_with("pslli") || // Added in 4.0 282 Name.starts_with("psllv") || // Added in 4.0 283 Name.starts_with("psra.d") || // Added in 4.0 284 Name.starts_with("psra.q") || // Added in 4.0 285 Name.starts_with("psra.w") || // Added in 4.0 286 Name.starts_with("psrai") || // Added in 4.0 287 Name.starts_with("psrav") || // Added in 4.0 288 Name.starts_with("psrl.d") || // Added in 4.0 289 Name.starts_with("psrl.q") || // Added in 4.0 290 Name.starts_with("psrl.w") || // Added in 4.0 291 Name.starts_with("psrli") || // Added in 4.0 292 Name.starts_with("psrlv") || // Added in 4.0 293 Name.starts_with("psub.") || // Added in 4.0 294 Name.starts_with("psubs.") || // Added in 8.0 295 Name.starts_with("psubus.") || // Added in 8.0 296 Name.starts_with("pternlog.") || // Added in 7.0 297 Name.starts_with("punpckh") || // Added in 3.9 298 Name.starts_with("punpckl") || // Added in 3.9 299 Name.starts_with("pxor.") || // Added in 3.9 300 Name.starts_with("shuf.f") || // Added in 6.0 301 Name.starts_with("shuf.i") || // Added in 6.0 302 Name.starts_with("shuf.p") || // Added in 4.0 303 Name.starts_with("sqrt.p") || // Added in 7.0 304 Name.starts_with("store.b.") || // Added in 3.9 305 Name.starts_with("store.d.") || // Added in 3.9 306 Name.starts_with("store.p") || // Added in 3.9 307 Name.starts_with("store.q.") || // Added in 3.9 308 Name.starts_with("store.w.") || // Added in 3.9 309 Name == "store.ss" || // Added in 7.0 310 Name.starts_with("storeu.") || // Added in 3.9 311 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0 312 Name.starts_with("ucmp.") || // Added in 5.0 313 Name.starts_with("unpckh.") || // Added in 3.9 314 Name.starts_with("unpckl.") || // Added in 3.9 315 Name.starts_with("valign.") || // Added in 4.0 316 Name == "vcvtph2ps.128" || // Added in 11.0 317 Name == "vcvtph2ps.256" || // Added in 11.0 318 Name.starts_with("vextract") || // Added in 4.0 319 Name.starts_with("vfmadd.") || // Added in 7.0 320 Name.starts_with("vfmaddsub.") || // Added in 7.0 321 Name.starts_with("vfnmadd.") || // Added in 7.0 322 Name.starts_with("vfnmsub.") || // Added in 7.0 323 Name.starts_with("vpdpbusd.") || // Added in 7.0 324 Name.starts_with("vpdpbusds.") || // Added in 7.0 325 Name.starts_with("vpdpwssd.") || // Added in 7.0 326 Name.starts_with("vpdpwssds.") || // Added in 7.0 327 Name.starts_with("vpermi2var.") || // Added in 7.0 328 Name.starts_with("vpermil.p") || // Added in 3.9 329 Name.starts_with("vpermilvar.") || // Added in 4.0 330 Name.starts_with("vpermt2var.") || // Added in 7.0 331 Name.starts_with("vpmadd52") || // Added in 7.0 332 Name.starts_with("vpshld.") || // Added in 7.0 333 Name.starts_with("vpshldv.") || // Added in 8.0 334 Name.starts_with("vpshrd.") || // Added in 7.0 335 Name.starts_with("vpshrdv.") || // Added in 8.0 336 Name.starts_with("vpshufbitqmb.") || // Added in 8.0 337 Name.starts_with("xor.")); // Added in 3.9 338 339 if (Name.consume_front("mask3.")) 340 // 'avx512.mask3.*' 341 return (Name.starts_with("vfmadd.") || // Added in 7.0 342 Name.starts_with("vfmaddsub.") || // Added in 7.0 343 Name.starts_with("vfmsub.") || // Added in 7.0 344 Name.starts_with("vfmsubadd.") || // Added in 7.0 345 Name.starts_with("vfnmsub.")); // Added in 7.0 346 347 if (Name.consume_front("maskz.")) 348 // 'avx512.maskz.*' 349 return (Name.starts_with("pternlog.") || // Added in 7.0 350 Name.starts_with("vfmadd.") || // Added in 7.0 351 Name.starts_with("vfmaddsub.") || // Added in 7.0 352 Name.starts_with("vpdpbusd.") || // Added in 7.0 353 Name.starts_with("vpdpbusds.") || // Added in 7.0 354 Name.starts_with("vpdpwssd.") || // Added in 7.0 355 Name.starts_with("vpdpwssds.") || // Added in 7.0 356 Name.starts_with("vpermt2var.") || // Added in 7.0 357 Name.starts_with("vpmadd52") || // Added in 7.0 358 Name.starts_with("vpshldv.") || // Added in 8.0 359 Name.starts_with("vpshrdv.")); // Added in 8.0 360 361 // 'avx512.*' 362 return (Name == "movntdqa" || // Added in 5.0 363 Name == "pmul.dq.512" || // Added in 7.0 364 Name == "pmulu.dq.512" || // Added in 7.0 365 Name.starts_with("broadcastm") || // Added in 6.0 366 Name.starts_with("cmp.p") || // Added in 12.0 367 Name.starts_with("cvtb2mask.") || // Added in 7.0 368 Name.starts_with("cvtd2mask.") || // Added in 7.0 369 Name.starts_with("cvtmask2") || // Added in 5.0 370 Name.starts_with("cvtq2mask.") || // Added in 7.0 371 Name == "cvtusi2sd" || // Added in 7.0 372 Name.starts_with("cvtw2mask.") || // Added in 7.0 373 Name == "kand.w" || // Added in 7.0 374 Name == "kandn.w" || // Added in 7.0 375 Name == "knot.w" || // Added in 7.0 376 Name == "kor.w" || // Added in 7.0 377 Name == "kortestc.w" || // Added in 7.0 378 Name == "kortestz.w" || // Added in 7.0 379 Name.starts_with("kunpck") || // added in 6.0 380 Name == "kxnor.w" || // Added in 7.0 381 Name == "kxor.w" || // Added in 7.0 382 Name.starts_with("padds.") || // Added in 8.0 383 Name.starts_with("pbroadcast") || // Added in 3.9 384 Name.starts_with("prol") || // Added in 8.0 385 Name.starts_with("pror") || // Added in 8.0 386 Name.starts_with("psll.dq") || // Added in 3.9 387 Name.starts_with("psrl.dq") || // Added in 3.9 388 Name.starts_with("psubs.") || // Added in 8.0 389 Name.starts_with("ptestm") || // Added in 6.0 390 Name.starts_with("ptestnm") || // Added in 6.0 391 Name.starts_with("storent.") || // Added in 3.9 392 Name.starts_with("vbroadcast.s") || // Added in 7.0 393 Name.starts_with("vpshld.") || // Added in 8.0 394 Name.starts_with("vpshrd.")); // Added in 8.0 395 } 396 397 if (Name.consume_front("fma.")) 398 return (Name.starts_with("vfmadd.") || // Added in 7.0 399 Name.starts_with("vfmsub.") || // Added in 7.0 400 Name.starts_with("vfmsubadd.") || // Added in 7.0 401 Name.starts_with("vfnmadd.") || // Added in 7.0 402 Name.starts_with("vfnmsub.")); // Added in 7.0 403 404 if (Name.consume_front("fma4.")) 405 return Name.starts_with("vfmadd.s"); // Added in 7.0 406 407 if (Name.consume_front("sse.")) 408 return (Name == "add.ss" || // Added in 4.0 409 Name == "cvtsi2ss" || // Added in 7.0 410 Name == "cvtsi642ss" || // Added in 7.0 411 Name == "div.ss" || // Added in 4.0 412 Name == "mul.ss" || // Added in 4.0 413 Name.starts_with("sqrt.p") || // Added in 7.0 414 Name == "sqrt.ss" || // Added in 7.0 415 Name.starts_with("storeu.") || // Added in 3.9 416 Name == "sub.ss"); // Added in 4.0 417 418 if (Name.consume_front("sse2.")) 419 return (Name == "add.sd" || // Added in 4.0 420 Name == "cvtdq2pd" || // Added in 3.9 421 Name == "cvtdq2ps" || // Added in 7.0 422 Name == "cvtps2pd" || // Added in 3.9 423 Name == "cvtsi2sd" || // Added in 7.0 424 Name == "cvtsi642sd" || // Added in 7.0 425 Name == "cvtss2sd" || // Added in 7.0 426 Name == "div.sd" || // Added in 4.0 427 Name == "mul.sd" || // Added in 4.0 428 Name.starts_with("padds.") || // Added in 8.0 429 Name.starts_with("paddus.") || // Added in 8.0 430 Name.starts_with("pcmpeq.") || // Added in 3.1 431 Name.starts_with("pcmpgt.") || // Added in 3.1 432 Name == "pmaxs.w" || // Added in 3.9 433 Name == "pmaxu.b" || // Added in 3.9 434 Name == "pmins.w" || // Added in 3.9 435 Name == "pminu.b" || // Added in 3.9 436 Name == "pmulu.dq" || // Added in 7.0 437 Name.starts_with("pshuf") || // Added in 3.9 438 Name.starts_with("psll.dq") || // Added in 3.7 439 Name.starts_with("psrl.dq") || // Added in 3.7 440 Name.starts_with("psubs.") || // Added in 8.0 441 Name.starts_with("psubus.") || // Added in 8.0 442 Name.starts_with("sqrt.p") || // Added in 7.0 443 Name == "sqrt.sd" || // Added in 7.0 444 Name == "storel.dq" || // Added in 3.9 445 Name.starts_with("storeu.") || // Added in 3.9 446 Name == "sub.sd"); // Added in 4.0 447 448 if (Name.consume_front("sse41.")) 449 return (Name.starts_with("blendp") || // Added in 3.7 450 Name == "movntdqa" || // Added in 5.0 451 Name == "pblendw" || // Added in 3.7 452 Name == "pmaxsb" || // Added in 3.9 453 Name == "pmaxsd" || // Added in 3.9 454 Name == "pmaxud" || // Added in 3.9 455 Name == "pmaxuw" || // Added in 3.9 456 Name == "pminsb" || // Added in 3.9 457 Name == "pminsd" || // Added in 3.9 458 Name == "pminud" || // Added in 3.9 459 Name == "pminuw" || // Added in 3.9 460 Name.starts_with("pmovsx") || // Added in 3.8 461 Name.starts_with("pmovzx") || // Added in 3.9 462 Name == "pmuldq"); // Added in 7.0 463 464 if (Name.consume_front("sse42.")) 465 return Name == "crc32.64.8"; // Added in 3.4 466 467 if (Name.consume_front("sse4a.")) 468 return Name.starts_with("movnt."); // Added in 3.9 469 470 if (Name.consume_front("ssse3.")) 471 return (Name == "pabs.b.128" || // Added in 6.0 472 Name == "pabs.d.128" || // Added in 6.0 473 Name == "pabs.w.128"); // Added in 6.0 474 475 if (Name.consume_front("xop.")) 476 return (Name == "vpcmov" || // Added in 3.8 477 Name == "vpcmov.256" || // Added in 5.0 478 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0 479 Name.starts_with("vprot")); // Added in 8.0 480 481 return (Name == "addcarry.u32" || // Added in 8.0 482 Name == "addcarry.u64" || // Added in 8.0 483 Name == "addcarryx.u32" || // Added in 8.0 484 Name == "addcarryx.u64" || // Added in 8.0 485 Name == "subborrow.u32" || // Added in 8.0 486 Name == "subborrow.u64" || // Added in 8.0 487 Name.starts_with("vcvtph2ps.")); // Added in 11.0 488 } 489 490 static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, 491 Function *&NewFn) { 492 // Only handle intrinsics that start with "x86.". 493 if (!Name.consume_front("x86.")) 494 return false; 495 496 if (shouldUpgradeX86Intrinsic(F, Name)) { 497 NewFn = nullptr; 498 return true; 499 } 500 501 if (Name == "rdtscp") { // Added in 8.0 502 // If this intrinsic has 0 operands, it's the new version. 503 if (F->getFunctionType()->getNumParams() == 0) 504 return false; 505 506 rename(F); 507 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), 508 Intrinsic::x86_rdtscp); 509 return true; 510 } 511 512 Intrinsic::ID ID; 513 514 // SSE4.1 ptest functions may have an old signature. 515 if (Name.consume_front("sse41.ptest")) { // Added in 3.2 516 ID = StringSwitch<Intrinsic::ID>(Name) 517 .Case("c", Intrinsic::x86_sse41_ptestc) 518 .Case("z", Intrinsic::x86_sse41_ptestz) 519 .Case("nzc", Intrinsic::x86_sse41_ptestnzc) 520 .Default(Intrinsic::not_intrinsic); 521 if (ID != Intrinsic::not_intrinsic) 522 return upgradePTESTIntrinsic(F, ID, NewFn); 523 524 return false; 525 } 526 527 // Several blend and other instructions with masks used the wrong number of 528 // bits. 529 530 // Added in 3.6 531 ID = StringSwitch<Intrinsic::ID>(Name) 532 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps) 533 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd) 534 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps) 535 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw) 536 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256) 537 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw) 538 .Default(Intrinsic::not_intrinsic); 539 if (ID != Intrinsic::not_intrinsic) 540 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn); 541 542 if (Name.consume_front("avx512.mask.cmp.")) { 543 // Added in 7.0 544 ID = StringSwitch<Intrinsic::ID>(Name) 545 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128) 546 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256) 547 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512) 548 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128) 549 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256) 550 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512) 551 .Default(Intrinsic::not_intrinsic); 552 if (ID != Intrinsic::not_intrinsic) 553 return upgradeX86MaskedFPCompare(F, ID, NewFn); 554 return false; // No other 'x86.avx523.mask.cmp.*'. 555 } 556 557 if (Name.consume_front("avx512bf16.")) { 558 // Added in 9.0 559 ID = StringSwitch<Intrinsic::ID>(Name) 560 .Case("cvtne2ps2bf16.128", 561 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128) 562 .Case("cvtne2ps2bf16.256", 563 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256) 564 .Case("cvtne2ps2bf16.512", 565 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512) 566 .Case("mask.cvtneps2bf16.128", 567 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128) 568 .Case("cvtneps2bf16.256", 569 Intrinsic::x86_avx512bf16_cvtneps2bf16_256) 570 .Case("cvtneps2bf16.512", 571 Intrinsic::x86_avx512bf16_cvtneps2bf16_512) 572 .Default(Intrinsic::not_intrinsic); 573 if (ID != Intrinsic::not_intrinsic) 574 return upgradeX86BF16Intrinsic(F, ID, NewFn); 575 576 // Added in 9.0 577 ID = StringSwitch<Intrinsic::ID>(Name) 578 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128) 579 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256) 580 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512) 581 .Default(Intrinsic::not_intrinsic); 582 if (ID != Intrinsic::not_intrinsic) 583 return upgradeX86BF16DPIntrinsic(F, ID, NewFn); 584 return false; // No other 'x86.avx512bf16.*'. 585 } 586 587 if (Name.consume_front("xop.")) { 588 Intrinsic::ID ID = Intrinsic::not_intrinsic; 589 if (Name.starts_with("vpermil2")) { // Added in 3.9 590 // Upgrade any XOP PERMIL2 index operand still using a float/double 591 // vector. 592 auto Idx = F->getFunctionType()->getParamType(2); 593 if (Idx->isFPOrFPVectorTy()) { 594 unsigned IdxSize = Idx->getPrimitiveSizeInBits(); 595 unsigned EltSize = Idx->getScalarSizeInBits(); 596 if (EltSize == 64 && IdxSize == 128) 597 ID = Intrinsic::x86_xop_vpermil2pd; 598 else if (EltSize == 32 && IdxSize == 128) 599 ID = Intrinsic::x86_xop_vpermil2ps; 600 else if (EltSize == 64 && IdxSize == 256) 601 ID = Intrinsic::x86_xop_vpermil2pd_256; 602 else 603 ID = Intrinsic::x86_xop_vpermil2ps_256; 604 } 605 } else if (F->arg_size() == 2) 606 // frcz.ss/sd may need to have an argument dropped. Added in 3.2 607 ID = StringSwitch<Intrinsic::ID>(Name) 608 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss) 609 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd) 610 .Default(Intrinsic::not_intrinsic); 611 612 if (ID != Intrinsic::not_intrinsic) { 613 rename(F); 614 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID); 615 return true; 616 } 617 return false; // No other 'x86.xop.*' 618 } 619 620 if (Name == "seh.recoverfp") { 621 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), 622 Intrinsic::eh_recoverfp); 623 return true; 624 } 625 626 return false; 627 } 628 629 // Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so. 630 // IsArm: 'arm.*', !IsArm: 'aarch64.*'. 631 static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, 632 StringRef Name, 633 Function *&NewFn) { 634 if (Name.starts_with("rbit")) { 635 // '(arm|aarch64).rbit'. 636 NewFn = Intrinsic::getOrInsertDeclaration( 637 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType()); 638 return true; 639 } 640 641 if (Name == "thread.pointer") { 642 // '(arm|aarch64).thread.pointer'. 643 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), 644 Intrinsic::thread_pointer); 645 return true; 646 } 647 648 bool Neon = Name.consume_front("neon."); 649 if (Neon) { 650 // '(arm|aarch64).neon.*'. 651 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and 652 // v16i8 respectively. 653 if (Name.consume_front("bfdot.")) { 654 // (arm|aarch64).neon.bfdot.*'. 655 Intrinsic::ID ID = 656 StringSwitch<Intrinsic::ID>(Name) 657 .Cases("v2f32.v8i8", "v4f32.v16i8", 658 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot 659 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot) 660 .Default(Intrinsic::not_intrinsic); 661 if (ID != Intrinsic::not_intrinsic) { 662 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits(); 663 assert((OperandWidth == 64 || OperandWidth == 128) && 664 "Unexpected operand width"); 665 LLVMContext &Ctx = F->getParent()->getContext(); 666 std::array<Type *, 2> Tys{ 667 {F->getReturnType(), 668 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}}; 669 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys); 670 return true; 671 } 672 return false; // No other '(arm|aarch64).neon.bfdot.*'. 673 } 674 675 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic 676 // anymore and accept v8bf16 instead of v16i8. 677 if (Name.consume_front("bfm")) { 678 // (arm|aarch64).neon.bfm*'. 679 if (Name.consume_back(".v4f32.v16i8")) { 680 // (arm|aarch64).neon.bfm*.v4f32.v16i8'. 681 Intrinsic::ID ID = 682 StringSwitch<Intrinsic::ID>(Name) 683 .Case("mla", 684 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla 685 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla) 686 .Case("lalb", 687 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb 688 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb) 689 .Case("lalt", 690 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt 691 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt) 692 .Default(Intrinsic::not_intrinsic); 693 if (ID != Intrinsic::not_intrinsic) { 694 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID); 695 return true; 696 } 697 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'. 698 } 699 return false; // No other '(arm|aarch64).neon.bfm*. 700 } 701 // Continue on to Aarch64 Neon or Arm Neon. 702 } 703 // Continue on to Arm or Aarch64. 704 705 if (IsArm) { 706 // 'arm.*'. 707 if (Neon) { 708 // 'arm.neon.*'. 709 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) 710 .StartsWith("vclz.", Intrinsic::ctlz) 711 .StartsWith("vcnt.", Intrinsic::ctpop) 712 .StartsWith("vqadds.", Intrinsic::sadd_sat) 713 .StartsWith("vqaddu.", Intrinsic::uadd_sat) 714 .StartsWith("vqsubs.", Intrinsic::ssub_sat) 715 .StartsWith("vqsubu.", Intrinsic::usub_sat) 716 .Default(Intrinsic::not_intrinsic); 717 if (ID != Intrinsic::not_intrinsic) { 718 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, 719 F->arg_begin()->getType()); 720 return true; 721 } 722 723 if (Name.consume_front("vst")) { 724 // 'arm.neon.vst*'. 725 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$"); 726 SmallVector<StringRef, 2> Groups; 727 if (vstRegex.match(Name, &Groups)) { 728 static const Intrinsic::ID StoreInts[] = { 729 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2, 730 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4}; 731 732 static const Intrinsic::ID StoreLaneInts[] = { 733 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane, 734 Intrinsic::arm_neon_vst4lane}; 735 736 auto fArgs = F->getFunctionType()->params(); 737 Type *Tys[] = {fArgs[0], fArgs[1]}; 738 if (Groups[1].size() == 1) 739 NewFn = Intrinsic::getOrInsertDeclaration( 740 F->getParent(), StoreInts[fArgs.size() - 3], Tys); 741 else 742 NewFn = Intrinsic::getOrInsertDeclaration( 743 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys); 744 return true; 745 } 746 return false; // No other 'arm.neon.vst*'. 747 } 748 749 return false; // No other 'arm.neon.*'. 750 } 751 752 if (Name.consume_front("mve.")) { 753 // 'arm.mve.*'. 754 if (Name == "vctp64") { 755 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) { 756 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename 757 // the function and deal with it below in UpgradeIntrinsicCall. 758 rename(F); 759 return true; 760 } 761 return false; // Not 'arm.mve.vctp64'. 762 } 763 764 // These too are changed to accept a v2i1 instead of the old v4i1. 765 if (Name.consume_back(".v4i1")) { 766 // 'arm.mve.*.v4i1'. 767 if (Name.consume_back(".predicated.v2i64.v4i32")) 768 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1' 769 return Name == "mull.int" || Name == "vqdmull"; 770 771 if (Name.consume_back(".v2i64")) { 772 // 'arm.mve.*.v2i64.v4i1' 773 bool IsGather = Name.consume_front("vldr.gather."); 774 if (IsGather || Name.consume_front("vstr.scatter.")) { 775 if (Name.consume_front("base.")) { 776 // Optional 'wb.' prefix. 777 Name.consume_front("wb."); 778 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)? 779 // predicated.v2i64.v2i64.v4i1'. 780 return Name == "predicated.v2i64"; 781 } 782 783 if (Name.consume_front("offset.predicated.")) 784 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") || 785 Name == (IsGather ? "v2i64.p0" : "p0.v2i64"); 786 787 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'. 788 return false; 789 } 790 791 return false; // No other 'arm.mve.*.v2i64.v4i1'. 792 } 793 return false; // No other 'arm.mve.*.v4i1'. 794 } 795 return false; // No other 'arm.mve.*'. 796 } 797 798 if (Name.consume_front("cde.vcx")) { 799 // 'arm.cde.vcx*'. 800 if (Name.consume_back(".predicated.v2i64.v4i1")) 801 // 'arm.cde.vcx*.predicated.v2i64.v4i1'. 802 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" || 803 Name == "3q" || Name == "3qa"; 804 805 return false; // No other 'arm.cde.vcx*'. 806 } 807 } else { 808 // 'aarch64.*'. 809 if (Neon) { 810 // 'aarch64.neon.*'. 811 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) 812 .StartsWith("frintn", Intrinsic::roundeven) 813 .StartsWith("rbit", Intrinsic::bitreverse) 814 .Default(Intrinsic::not_intrinsic); 815 if (ID != Intrinsic::not_intrinsic) { 816 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, 817 F->arg_begin()->getType()); 818 return true; 819 } 820 821 if (Name.starts_with("addp")) { 822 // 'aarch64.neon.addp*'. 823 if (F->arg_size() != 2) 824 return false; // Invalid IR. 825 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType()); 826 if (Ty && Ty->getElementType()->isFloatingPointTy()) { 827 NewFn = Intrinsic::getOrInsertDeclaration( 828 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty); 829 return true; 830 } 831 } 832 833 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc. 834 if (Name.starts_with("bfcvt")) { 835 NewFn = nullptr; 836 return true; 837 } 838 839 return false; // No other 'aarch64.neon.*'. 840 } 841 if (Name.consume_front("sve.")) { 842 // 'aarch64.sve.*'. 843 if (Name.consume_front("bf")) { 844 if (Name.consume_back(".lane")) { 845 // 'aarch64.sve.bf*.lane'. 846 Intrinsic::ID ID = 847 StringSwitch<Intrinsic::ID>(Name) 848 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2) 849 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2) 850 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2) 851 .Default(Intrinsic::not_intrinsic); 852 if (ID != Intrinsic::not_intrinsic) { 853 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID); 854 return true; 855 } 856 return false; // No other 'aarch64.sve.bf*.lane'. 857 } 858 return false; // No other 'aarch64.sve.bf*'. 859 } 860 861 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32' 862 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") { 863 NewFn = nullptr; 864 return true; 865 } 866 867 if (Name.consume_front("addqv")) { 868 // 'aarch64.sve.addqv'. 869 if (!F->getReturnType()->isFPOrFPVectorTy()) 870 return false; 871 872 auto Args = F->getFunctionType()->params(); 873 Type *Tys[] = {F->getReturnType(), Args[1]}; 874 NewFn = Intrinsic::getOrInsertDeclaration( 875 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys); 876 return true; 877 } 878 879 if (Name.consume_front("ld")) { 880 // 'aarch64.sve.ld*'. 881 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)"); 882 if (LdRegex.match(Name)) { 883 Type *ScalarTy = 884 cast<VectorType>(F->getReturnType())->getElementType(); 885 ElementCount EC = 886 cast<VectorType>(F->arg_begin()->getType())->getElementCount(); 887 Type *Ty = VectorType::get(ScalarTy, EC); 888 static const Intrinsic::ID LoadIDs[] = { 889 Intrinsic::aarch64_sve_ld2_sret, 890 Intrinsic::aarch64_sve_ld3_sret, 891 Intrinsic::aarch64_sve_ld4_sret, 892 }; 893 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), 894 LoadIDs[Name[0] - '2'], Ty); 895 return true; 896 } 897 return false; // No other 'aarch64.sve.ld*'. 898 } 899 900 if (Name.consume_front("tuple.")) { 901 // 'aarch64.sve.tuple.*'. 902 if (Name.starts_with("get")) { 903 // 'aarch64.sve.tuple.get*'. 904 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()}; 905 NewFn = Intrinsic::getOrInsertDeclaration( 906 F->getParent(), Intrinsic::vector_extract, Tys); 907 return true; 908 } 909 910 if (Name.starts_with("set")) { 911 // 'aarch64.sve.tuple.set*'. 912 auto Args = F->getFunctionType()->params(); 913 Type *Tys[] = {Args[0], Args[2], Args[1]}; 914 NewFn = Intrinsic::getOrInsertDeclaration( 915 F->getParent(), Intrinsic::vector_insert, Tys); 916 return true; 917 } 918 919 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)"); 920 if (CreateTupleRegex.match(Name)) { 921 // 'aarch64.sve.tuple.create*'. 922 auto Args = F->getFunctionType()->params(); 923 Type *Tys[] = {F->getReturnType(), Args[1]}; 924 NewFn = Intrinsic::getOrInsertDeclaration( 925 F->getParent(), Intrinsic::vector_insert, Tys); 926 return true; 927 } 928 return false; // No other 'aarch64.sve.tuple.*'. 929 } 930 return false; // No other 'aarch64.sve.*'. 931 } 932 } 933 return false; // No other 'arm.*', 'aarch64.*'. 934 } 935 936 static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) { 937 if (Name.consume_front("abs.")) 938 return StringSwitch<Intrinsic::ID>(Name) 939 .Case("bf16", Intrinsic::nvvm_abs_bf16) 940 .Case("bf16x2", Intrinsic::nvvm_abs_bf16x2) 941 .Default(Intrinsic::not_intrinsic); 942 943 if (Name.consume_front("fma.rn.")) 944 return StringSwitch<Intrinsic::ID>(Name) 945 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16) 946 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2) 947 .Case("ftz.bf16", Intrinsic::nvvm_fma_rn_ftz_bf16) 948 .Case("ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2) 949 .Case("ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16) 950 .Case("ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2) 951 .Case("ftz.sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16) 952 .Case("ftz.sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2) 953 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16) 954 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2) 955 .Case("sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16) 956 .Case("sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2) 957 .Default(Intrinsic::not_intrinsic); 958 959 if (Name.consume_front("fmax.")) 960 return StringSwitch<Intrinsic::ID>(Name) 961 .Case("bf16", Intrinsic::nvvm_fmax_bf16) 962 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2) 963 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16) 964 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2) 965 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16) 966 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2) 967 .Case("ftz.nan.xorsign.abs.bf16", 968 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16) 969 .Case("ftz.nan.xorsign.abs.bf16x2", 970 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2) 971 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16) 972 .Case("ftz.xorsign.abs.bf16x2", 973 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2) 974 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16) 975 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2) 976 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16) 977 .Case("nan.xorsign.abs.bf16x2", 978 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2) 979 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16) 980 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2) 981 .Default(Intrinsic::not_intrinsic); 982 983 if (Name.consume_front("fmin.")) 984 return StringSwitch<Intrinsic::ID>(Name) 985 .Case("bf16", Intrinsic::nvvm_fmin_bf16) 986 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2) 987 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16) 988 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2) 989 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16) 990 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2) 991 .Case("ftz.nan.xorsign.abs.bf16", 992 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16) 993 .Case("ftz.nan.xorsign.abs.bf16x2", 994 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2) 995 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16) 996 .Case("ftz.xorsign.abs.bf16x2", 997 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2) 998 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16) 999 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2) 1000 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16) 1001 .Case("nan.xorsign.abs.bf16x2", 1002 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2) 1003 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16) 1004 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2) 1005 .Default(Intrinsic::not_intrinsic); 1006 1007 if (Name.consume_front("neg.")) 1008 return StringSwitch<Intrinsic::ID>(Name) 1009 .Case("bf16", Intrinsic::nvvm_neg_bf16) 1010 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2) 1011 .Default(Intrinsic::not_intrinsic); 1012 1013 return Intrinsic::not_intrinsic; 1014 } 1015 1016 static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, 1017 bool CanUpgradeDebugIntrinsicsToRecords) { 1018 assert(F && "Illegal to upgrade a non-existent Function."); 1019 1020 StringRef Name = F->getName(); 1021 1022 // Quickly eliminate it, if it's not a candidate. 1023 if (!Name.consume_front("llvm.") || Name.empty()) 1024 return false; 1025 1026 switch (Name[0]) { 1027 default: break; 1028 case 'a': { 1029 bool IsArm = Name.consume_front("arm."); 1030 if (IsArm || Name.consume_front("aarch64.")) { 1031 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn)) 1032 return true; 1033 break; 1034 } 1035 1036 if (Name.consume_front("amdgcn.")) { 1037 if (Name == "alignbit") { 1038 // Target specific intrinsic became redundant 1039 NewFn = Intrinsic::getOrInsertDeclaration( 1040 F->getParent(), Intrinsic::fshr, {F->getReturnType()}); 1041 return true; 1042 } 1043 1044 if (Name.consume_front("atomic.")) { 1045 if (Name.starts_with("inc") || Name.starts_with("dec")) { 1046 // These were replaced with atomicrmw uinc_wrap and udec_wrap, so 1047 // there's no new declaration. 1048 NewFn = nullptr; 1049 return true; 1050 } 1051 break; // No other 'amdgcn.atomic.*' 1052 } 1053 1054 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") || 1055 Name.consume_front("flat.atomic.")) { 1056 if (Name.starts_with("fadd") || 1057 // FIXME: We should also remove fmin.num and fmax.num intrinsics. 1058 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) || 1059 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) { 1060 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new 1061 // declaration. 1062 NewFn = nullptr; 1063 return true; 1064 } 1065 } 1066 1067 if (Name.starts_with("ldexp.")) { 1068 // Target specific intrinsic became redundant 1069 NewFn = Intrinsic::getOrInsertDeclaration( 1070 F->getParent(), Intrinsic::ldexp, 1071 {F->getReturnType(), F->getArg(1)->getType()}); 1072 return true; 1073 } 1074 break; // No other 'amdgcn.*' 1075 } 1076 1077 break; 1078 } 1079 case 'c': { 1080 if (F->arg_size() == 1) { 1081 Intrinsic::ID ID = StringSwitch<Intrinsic::ID>(Name) 1082 .StartsWith("ctlz.", Intrinsic::ctlz) 1083 .StartsWith("cttz.", Intrinsic::cttz) 1084 .Default(Intrinsic::not_intrinsic); 1085 if (ID != Intrinsic::not_intrinsic) { 1086 rename(F); 1087 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, 1088 F->arg_begin()->getType()); 1089 return true; 1090 } 1091 } 1092 1093 if (F->arg_size() == 2 && Name == "coro.end") { 1094 rename(F); 1095 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), 1096 Intrinsic::coro_end); 1097 return true; 1098 } 1099 1100 break; 1101 } 1102 case 'd': 1103 if (Name.consume_front("dbg.")) { 1104 // Mark debug intrinsics for upgrade to new debug format. 1105 if (CanUpgradeDebugIntrinsicsToRecords && 1106 F->getParent()->IsNewDbgInfoFormat) { 1107 if (Name == "addr" || Name == "value" || Name == "assign" || 1108 Name == "declare" || Name == "label") { 1109 // There's no function to replace these with. 1110 NewFn = nullptr; 1111 // But we do want these to get upgraded. 1112 return true; 1113 } 1114 } 1115 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get 1116 // converted to DbgVariableRecords later. 1117 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) { 1118 rename(F); 1119 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), 1120 Intrinsic::dbg_value); 1121 return true; 1122 } 1123 break; // No other 'dbg.*'. 1124 } 1125 break; 1126 case 'e': 1127 if (Name.consume_front("experimental.vector.")) { 1128 Intrinsic::ID ID = 1129 StringSwitch<Intrinsic::ID>(Name) 1130 // Skip over extract.last.active, otherwise it will be 'upgraded' 1131 // to a regular vector extract which is a different operation. 1132 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic) 1133 .StartsWith("extract.", Intrinsic::vector_extract) 1134 .StartsWith("insert.", Intrinsic::vector_insert) 1135 .StartsWith("splice.", Intrinsic::vector_splice) 1136 .StartsWith("reverse.", Intrinsic::vector_reverse) 1137 .StartsWith("interleave2.", Intrinsic::vector_interleave2) 1138 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2) 1139 .Default(Intrinsic::not_intrinsic); 1140 if (ID != Intrinsic::not_intrinsic) { 1141 const auto *FT = F->getFunctionType(); 1142 SmallVector<Type *, 2> Tys; 1143 if (ID == Intrinsic::vector_extract || 1144 ID == Intrinsic::vector_interleave2) 1145 // Extracting overloads the return type. 1146 Tys.push_back(FT->getReturnType()); 1147 if (ID != Intrinsic::vector_interleave2) 1148 Tys.push_back(FT->getParamType(0)); 1149 if (ID == Intrinsic::vector_insert) 1150 // Inserting overloads the inserted type. 1151 Tys.push_back(FT->getParamType(1)); 1152 rename(F); 1153 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys); 1154 return true; 1155 } 1156 1157 if (Name.consume_front("reduce.")) { 1158 SmallVector<StringRef, 2> Groups; 1159 static const Regex R("^([a-z]+)\\.[a-z][0-9]+"); 1160 if (R.match(Name, &Groups)) 1161 ID = StringSwitch<Intrinsic::ID>(Groups[1]) 1162 .Case("add", Intrinsic::vector_reduce_add) 1163 .Case("mul", Intrinsic::vector_reduce_mul) 1164 .Case("and", Intrinsic::vector_reduce_and) 1165 .Case("or", Intrinsic::vector_reduce_or) 1166 .Case("xor", Intrinsic::vector_reduce_xor) 1167 .Case("smax", Intrinsic::vector_reduce_smax) 1168 .Case("smin", Intrinsic::vector_reduce_smin) 1169 .Case("umax", Intrinsic::vector_reduce_umax) 1170 .Case("umin", Intrinsic::vector_reduce_umin) 1171 .Case("fmax", Intrinsic::vector_reduce_fmax) 1172 .Case("fmin", Intrinsic::vector_reduce_fmin) 1173 .Default(Intrinsic::not_intrinsic); 1174 1175 bool V2 = false; 1176 if (ID == Intrinsic::not_intrinsic) { 1177 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+"); 1178 Groups.clear(); 1179 V2 = true; 1180 if (R2.match(Name, &Groups)) 1181 ID = StringSwitch<Intrinsic::ID>(Groups[1]) 1182 .Case("fadd", Intrinsic::vector_reduce_fadd) 1183 .Case("fmul", Intrinsic::vector_reduce_fmul) 1184 .Default(Intrinsic::not_intrinsic); 1185 } 1186 if (ID != Intrinsic::not_intrinsic) { 1187 rename(F); 1188 auto Args = F->getFunctionType()->params(); 1189 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, 1190 {Args[V2 ? 1 : 0]}); 1191 return true; 1192 } 1193 break; // No other 'expermental.vector.reduce.*'. 1194 } 1195 break; // No other 'experimental.vector.*'. 1196 } 1197 if (Name.consume_front("experimental.stepvector.")) { 1198 Intrinsic::ID ID = Intrinsic::stepvector; 1199 rename(F); 1200 NewFn = Intrinsic::getOrInsertDeclaration( 1201 F->getParent(), ID, F->getFunctionType()->getReturnType()); 1202 return true; 1203 } 1204 break; // No other 'e*'. 1205 case 'f': 1206 if (Name.starts_with("flt.rounds")) { 1207 rename(F); 1208 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), 1209 Intrinsic::get_rounding); 1210 return true; 1211 } 1212 break; 1213 case 'i': 1214 if (Name.starts_with("invariant.group.barrier")) { 1215 // Rename invariant.group.barrier to launder.invariant.group 1216 auto Args = F->getFunctionType()->params(); 1217 Type* ObjectPtr[1] = {Args[0]}; 1218 rename(F); 1219 NewFn = Intrinsic::getOrInsertDeclaration( 1220 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr); 1221 return true; 1222 } 1223 break; 1224 case 'm': { 1225 // Updating the memory intrinsics (memcpy/memmove/memset) that have an 1226 // alignment parameter to embedding the alignment as an attribute of 1227 // the pointer args. 1228 if (unsigned ID = StringSwitch<unsigned>(Name) 1229 .StartsWith("memcpy.", Intrinsic::memcpy) 1230 .StartsWith("memmove.", Intrinsic::memmove) 1231 .Default(0)) { 1232 if (F->arg_size() == 5) { 1233 rename(F); 1234 // Get the types of dest, src, and len 1235 ArrayRef<Type *> ParamTypes = 1236 F->getFunctionType()->params().slice(0, 3); 1237 NewFn = 1238 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes); 1239 return true; 1240 } 1241 } 1242 if (Name.starts_with("memset.") && F->arg_size() == 5) { 1243 rename(F); 1244 // Get the types of dest, and len 1245 const auto *FT = F->getFunctionType(); 1246 Type *ParamTypes[2] = { 1247 FT->getParamType(0), // Dest 1248 FT->getParamType(2) // len 1249 }; 1250 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), 1251 Intrinsic::memset, ParamTypes); 1252 return true; 1253 } 1254 break; 1255 } 1256 case 'n': { 1257 if (Name.consume_front("nvvm.")) { 1258 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic. 1259 if (F->arg_size() == 1) { 1260 Intrinsic::ID IID = 1261 StringSwitch<Intrinsic::ID>(Name) 1262 .Cases("brev32", "brev64", Intrinsic::bitreverse) 1263 .Case("clz.i", Intrinsic::ctlz) 1264 .Case("popc.i", Intrinsic::ctpop) 1265 .Default(Intrinsic::not_intrinsic); 1266 if (IID != Intrinsic::not_intrinsic) { 1267 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID, 1268 {F->getReturnType()}); 1269 return true; 1270 } 1271 } 1272 1273 // Check for nvvm intrinsics that need a return type adjustment. 1274 if (!F->getReturnType()->getScalarType()->isBFloatTy()) { 1275 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); 1276 if (IID != Intrinsic::not_intrinsic) { 1277 NewFn = nullptr; 1278 return true; 1279 } 1280 } 1281 1282 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but 1283 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall. 1284 // 1285 // TODO: We could add lohi.i2d. 1286 bool Expand = false; 1287 if (Name.consume_front("abs.")) 1288 // nvvm.abs.{i,ii} 1289 Expand = Name == "i" || Name == "ll"; 1290 else if (Name == "clz.ll" || Name == "popc.ll" || Name == "h2f") 1291 Expand = true; 1292 else if (Name.consume_front("max.") || Name.consume_front("min.")) 1293 // nvvm.{min,max}.{i,ii,ui,ull} 1294 Expand = Name == "s" || Name == "i" || Name == "ll" || Name == "us" || 1295 Name == "ui" || Name == "ull"; 1296 else if (Name.consume_front("atomic.load.add.")) 1297 // nvvm.atomic.load.add.{f32.p,f64.p} 1298 Expand = Name.starts_with("f32.p") || Name.starts_with("f64.p"); 1299 else if (Name.consume_front("bitcast.")) 1300 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll} 1301 Expand = 1302 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll"; 1303 else if (Name.consume_front("rotate.")) 1304 // nvvm.rotate.{b32,b64,right.b64} 1305 Expand = Name == "b32" || Name == "b64" || Name == "right.b64"; 1306 else if (Name.consume_front("ptr.gen.to.")) 1307 // nvvm.ptr.gen.to.{local,shared,global,constant} 1308 Expand = Name.starts_with("local") || Name.starts_with("shared") || 1309 Name.starts_with("global") || Name.starts_with("constant"); 1310 else if (Name.consume_front("ptr.")) 1311 // nvvm.ptr.{local,shared,global,constant}.to.gen 1312 Expand = 1313 (Name.consume_front("local") || Name.consume_front("shared") || 1314 Name.consume_front("global") || Name.consume_front("constant")) && 1315 Name.starts_with(".to.gen"); 1316 else if (Name.consume_front("ldg.global.")) 1317 // nvvm.ldg.global.{i,p,f} 1318 Expand = (Name.starts_with("i.") || Name.starts_with("f.") || 1319 Name.starts_with("p.")); 1320 else 1321 Expand = false; 1322 1323 if (Expand) { 1324 NewFn = nullptr; 1325 return true; 1326 } 1327 break; // No other 'nvvm.*'. 1328 } 1329 break; 1330 } 1331 case 'o': 1332 // We only need to change the name to match the mangling including the 1333 // address space. 1334 if (Name.starts_with("objectsize.")) { 1335 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; 1336 if (F->arg_size() == 2 || F->arg_size() == 3 || 1337 F->getName() != 1338 Intrinsic::getName(Intrinsic::objectsize, Tys, F->getParent())) { 1339 rename(F); 1340 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), 1341 Intrinsic::objectsize, Tys); 1342 return true; 1343 } 1344 } 1345 break; 1346 1347 case 'p': 1348 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) { 1349 rename(F); 1350 NewFn = Intrinsic::getOrInsertDeclaration( 1351 F->getParent(), Intrinsic::ptr_annotation, 1352 {F->arg_begin()->getType(), F->getArg(1)->getType()}); 1353 return true; 1354 } 1355 break; 1356 1357 case 'r': { 1358 if (Name.consume_front("riscv.")) { 1359 Intrinsic::ID ID; 1360 ID = StringSwitch<Intrinsic::ID>(Name) 1361 .Case("aes32dsi", Intrinsic::riscv_aes32dsi) 1362 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi) 1363 .Case("aes32esi", Intrinsic::riscv_aes32esi) 1364 .Case("aes32esmi", Intrinsic::riscv_aes32esmi) 1365 .Default(Intrinsic::not_intrinsic); 1366 if (ID != Intrinsic::not_intrinsic) { 1367 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) { 1368 rename(F); 1369 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID); 1370 return true; 1371 } 1372 break; // No other applicable upgrades. 1373 } 1374 1375 ID = StringSwitch<Intrinsic::ID>(Name) 1376 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks) 1377 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed) 1378 .Default(Intrinsic::not_intrinsic); 1379 if (ID != Intrinsic::not_intrinsic) { 1380 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) || 1381 F->getFunctionType()->getReturnType()->isIntegerTy(64)) { 1382 rename(F); 1383 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID); 1384 return true; 1385 } 1386 break; // No other applicable upgrades. 1387 } 1388 1389 ID = StringSwitch<Intrinsic::ID>(Name) 1390 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0) 1391 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1) 1392 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0) 1393 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1) 1394 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0) 1395 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1) 1396 .Default(Intrinsic::not_intrinsic); 1397 if (ID != Intrinsic::not_intrinsic) { 1398 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) { 1399 rename(F); 1400 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID); 1401 return true; 1402 } 1403 break; // No other applicable upgrades. 1404 } 1405 break; // No other 'riscv.*' intrinsics 1406 } 1407 } break; 1408 1409 case 's': 1410 if (Name == "stackprotectorcheck") { 1411 NewFn = nullptr; 1412 return true; 1413 } 1414 break; 1415 1416 case 'v': { 1417 if (Name == "var.annotation" && F->arg_size() == 4) { 1418 rename(F); 1419 NewFn = Intrinsic::getOrInsertDeclaration( 1420 F->getParent(), Intrinsic::var_annotation, 1421 {{F->arg_begin()->getType(), F->getArg(1)->getType()}}); 1422 return true; 1423 } 1424 break; 1425 } 1426 1427 case 'w': 1428 if (Name.consume_front("wasm.")) { 1429 Intrinsic::ID ID = 1430 StringSwitch<Intrinsic::ID>(Name) 1431 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd) 1432 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd) 1433 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect) 1434 .Default(Intrinsic::not_intrinsic); 1435 if (ID != Intrinsic::not_intrinsic) { 1436 rename(F); 1437 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, 1438 F->getReturnType()); 1439 return true; 1440 } 1441 1442 if (Name.consume_front("dot.i8x16.i7x16.")) { 1443 ID = StringSwitch<Intrinsic::ID>(Name) 1444 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed) 1445 .Case("add.signed", 1446 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed) 1447 .Default(Intrinsic::not_intrinsic); 1448 if (ID != Intrinsic::not_intrinsic) { 1449 rename(F); 1450 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID); 1451 return true; 1452 } 1453 break; // No other 'wasm.dot.i8x16.i7x16.*'. 1454 } 1455 break; // No other 'wasm.*'. 1456 } 1457 break; 1458 1459 case 'x': 1460 if (upgradeX86IntrinsicFunction(F, Name, NewFn)) 1461 return true; 1462 } 1463 1464 auto *ST = dyn_cast<StructType>(F->getReturnType()); 1465 if (ST && (!ST->isLiteral() || ST->isPacked()) && 1466 F->getIntrinsicID() != Intrinsic::not_intrinsic) { 1467 // Replace return type with literal non-packed struct. Only do this for 1468 // intrinsics declared to return a struct, not for intrinsics with 1469 // overloaded return type, in which case the exact struct type will be 1470 // mangled into the name. 1471 SmallVector<Intrinsic::IITDescriptor> Desc; 1472 Intrinsic::getIntrinsicInfoTableEntries(F->getIntrinsicID(), Desc); 1473 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) { 1474 auto *FT = F->getFunctionType(); 1475 auto *NewST = StructType::get(ST->getContext(), ST->elements()); 1476 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg()); 1477 std::string Name = F->getName().str(); 1478 rename(F); 1479 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(), 1480 Name, F->getParent()); 1481 1482 // The new function may also need remangling. 1483 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn)) 1484 NewFn = *Result; 1485 return true; 1486 } 1487 } 1488 1489 // Remangle our intrinsic since we upgrade the mangling 1490 auto Result = llvm::Intrinsic::remangleIntrinsicFunction(F); 1491 if (Result != std::nullopt) { 1492 NewFn = *Result; 1493 return true; 1494 } 1495 1496 // This may not belong here. This function is effectively being overloaded 1497 // to both detect an intrinsic which needs upgrading, and to provide the 1498 // upgraded form of the intrinsic. We should perhaps have two separate 1499 // functions for this. 1500 return false; 1501 } 1502 1503 bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn, 1504 bool CanUpgradeDebugIntrinsicsToRecords) { 1505 NewFn = nullptr; 1506 bool Upgraded = 1507 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords); 1508 assert(F != NewFn && "Intrinsic function upgraded to the same function"); 1509 1510 // Upgrade intrinsic attributes. This does not change the function. 1511 if (NewFn) 1512 F = NewFn; 1513 if (Intrinsic::ID id = F->getIntrinsicID()) 1514 F->setAttributes(Intrinsic::getAttributes(F->getContext(), id)); 1515 return Upgraded; 1516 } 1517 1518 GlobalVariable *llvm::UpgradeGlobalVariable(GlobalVariable *GV) { 1519 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" || 1520 GV->getName() == "llvm.global_dtors")) || 1521 !GV->hasInitializer()) 1522 return nullptr; 1523 ArrayType *ATy = dyn_cast<ArrayType>(GV->getValueType()); 1524 if (!ATy) 1525 return nullptr; 1526 StructType *STy = dyn_cast<StructType>(ATy->getElementType()); 1527 if (!STy || STy->getNumElements() != 2) 1528 return nullptr; 1529 1530 LLVMContext &C = GV->getContext(); 1531 IRBuilder<> IRB(C); 1532 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1), 1533 IRB.getPtrTy()); 1534 Constant *Init = GV->getInitializer(); 1535 unsigned N = Init->getNumOperands(); 1536 std::vector<Constant *> NewCtors(N); 1537 for (unsigned i = 0; i != N; ++i) { 1538 auto Ctor = cast<Constant>(Init->getOperand(i)); 1539 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u), 1540 Ctor->getAggregateElement(1), 1541 Constant::getNullValue(IRB.getPtrTy())); 1542 } 1543 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors); 1544 1545 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(), 1546 NewInit, GV->getName()); 1547 } 1548 1549 // Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them 1550 // to byte shuffles. 1551 static Value *upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, 1552 unsigned Shift) { 1553 auto *ResultTy = cast<FixedVectorType>(Op->getType()); 1554 unsigned NumElts = ResultTy->getNumElements() * 8; 1555 1556 // Bitcast from a 64-bit element type to a byte element type. 1557 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts); 1558 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 1559 1560 // We'll be shuffling in zeroes. 1561 Value *Res = Constant::getNullValue(VecTy); 1562 1563 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 1564 // we'll just return the zero vector. 1565 if (Shift < 16) { 1566 int Idxs[64]; 1567 // 256/512-bit version is split into 2/4 16-byte lanes. 1568 for (unsigned l = 0; l != NumElts; l += 16) 1569 for (unsigned i = 0; i != 16; ++i) { 1570 unsigned Idx = NumElts + i - Shift; 1571 if (Idx < NumElts) 1572 Idx -= NumElts - 16; // end of lane, switch operand. 1573 Idxs[l + i] = Idx + l; 1574 } 1575 1576 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts)); 1577 } 1578 1579 // Bitcast back to a 64-bit element type. 1580 return Builder.CreateBitCast(Res, ResultTy, "cast"); 1581 } 1582 1583 // Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them 1584 // to byte shuffles. 1585 static Value *upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, 1586 unsigned Shift) { 1587 auto *ResultTy = cast<FixedVectorType>(Op->getType()); 1588 unsigned NumElts = ResultTy->getNumElements() * 8; 1589 1590 // Bitcast from a 64-bit element type to a byte element type. 1591 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts); 1592 Op = Builder.CreateBitCast(Op, VecTy, "cast"); 1593 1594 // We'll be shuffling in zeroes. 1595 Value *Res = Constant::getNullValue(VecTy); 1596 1597 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, 1598 // we'll just return the zero vector. 1599 if (Shift < 16) { 1600 int Idxs[64]; 1601 // 256/512-bit version is split into 2/4 16-byte lanes. 1602 for (unsigned l = 0; l != NumElts; l += 16) 1603 for (unsigned i = 0; i != 16; ++i) { 1604 unsigned Idx = i + Shift; 1605 if (Idx >= 16) 1606 Idx += NumElts - 16; // end of lane, switch operand. 1607 Idxs[l + i] = Idx + l; 1608 } 1609 1610 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts)); 1611 } 1612 1613 // Bitcast back to a 64-bit element type. 1614 return Builder.CreateBitCast(Res, ResultTy, "cast"); 1615 } 1616 1617 static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, 1618 unsigned NumElts) { 1619 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements"); 1620 llvm::VectorType *MaskTy = FixedVectorType::get( 1621 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth()); 1622 Mask = Builder.CreateBitCast(Mask, MaskTy); 1623 1624 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an 1625 // i8 and we need to extract down to the right number of elements. 1626 if (NumElts <= 4) { 1627 int Indices[4]; 1628 for (unsigned i = 0; i != NumElts; ++i) 1629 Indices[i] = i; 1630 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts), 1631 "extract"); 1632 } 1633 1634 return Mask; 1635 } 1636 1637 static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, 1638 Value *Op1) { 1639 // If the mask is all ones just emit the first operation. 1640 if (const auto *C = dyn_cast<Constant>(Mask)) 1641 if (C->isAllOnesValue()) 1642 return Op0; 1643 1644 Mask = getX86MaskVec(Builder, Mask, 1645 cast<FixedVectorType>(Op0->getType())->getNumElements()); 1646 return Builder.CreateSelect(Mask, Op0, Op1); 1647 } 1648 1649 static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, 1650 Value *Op1) { 1651 // If the mask is all ones just emit the first operation. 1652 if (const auto *C = dyn_cast<Constant>(Mask)) 1653 if (C->isAllOnesValue()) 1654 return Op0; 1655 1656 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), 1657 Mask->getType()->getIntegerBitWidth()); 1658 Mask = Builder.CreateBitCast(Mask, MaskTy); 1659 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0); 1660 return Builder.CreateSelect(Mask, Op0, Op1); 1661 } 1662 1663 // Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics. 1664 // PALIGNR handles large immediates by shifting while VALIGN masks the immediate 1665 // so we need to handle both cases. VALIGN also doesn't have 128-bit lanes. 1666 static Value *upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, 1667 Value *Op1, Value *Shift, 1668 Value *Passthru, Value *Mask, 1669 bool IsVALIGN) { 1670 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue(); 1671 1672 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements(); 1673 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!"); 1674 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!"); 1675 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!"); 1676 1677 // Mask the immediate for VALIGN. 1678 if (IsVALIGN) 1679 ShiftVal &= (NumElts - 1); 1680 1681 // If palignr is shifting the pair of vectors more than the size of two 1682 // lanes, emit zero. 1683 if (ShiftVal >= 32) 1684 return llvm::Constant::getNullValue(Op0->getType()); 1685 1686 // If palignr is shifting the pair of input vectors more than one lane, 1687 // but less than two lanes, convert to shifting in zeroes. 1688 if (ShiftVal > 16) { 1689 ShiftVal -= 16; 1690 Op1 = Op0; 1691 Op0 = llvm::Constant::getNullValue(Op0->getType()); 1692 } 1693 1694 int Indices[64]; 1695 // 256-bit palignr operates on 128-bit lanes so we need to handle that 1696 for (unsigned l = 0; l < NumElts; l += 16) { 1697 for (unsigned i = 0; i != 16; ++i) { 1698 unsigned Idx = ShiftVal + i; 1699 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN. 1700 Idx += NumElts - 16; // End of lane, switch operand. 1701 Indices[l + i] = Idx + l; 1702 } 1703 } 1704 1705 Value *Align = Builder.CreateShuffleVector( 1706 Op1, Op0, ArrayRef(Indices, NumElts), "palignr"); 1707 1708 return emitX86Select(Builder, Mask, Align, Passthru); 1709 } 1710 1711 static Value *upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, 1712 bool ZeroMask, bool IndexForm) { 1713 Type *Ty = CI.getType(); 1714 unsigned VecWidth = Ty->getPrimitiveSizeInBits(); 1715 unsigned EltWidth = Ty->getScalarSizeInBits(); 1716 bool IsFloat = Ty->isFPOrFPVectorTy(); 1717 Intrinsic::ID IID; 1718 if (VecWidth == 128 && EltWidth == 32 && IsFloat) 1719 IID = Intrinsic::x86_avx512_vpermi2var_ps_128; 1720 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat) 1721 IID = Intrinsic::x86_avx512_vpermi2var_d_128; 1722 else if (VecWidth == 128 && EltWidth == 64 && IsFloat) 1723 IID = Intrinsic::x86_avx512_vpermi2var_pd_128; 1724 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat) 1725 IID = Intrinsic::x86_avx512_vpermi2var_q_128; 1726 else if (VecWidth == 256 && EltWidth == 32 && IsFloat) 1727 IID = Intrinsic::x86_avx512_vpermi2var_ps_256; 1728 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) 1729 IID = Intrinsic::x86_avx512_vpermi2var_d_256; 1730 else if (VecWidth == 256 && EltWidth == 64 && IsFloat) 1731 IID = Intrinsic::x86_avx512_vpermi2var_pd_256; 1732 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) 1733 IID = Intrinsic::x86_avx512_vpermi2var_q_256; 1734 else if (VecWidth == 512 && EltWidth == 32 && IsFloat) 1735 IID = Intrinsic::x86_avx512_vpermi2var_ps_512; 1736 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) 1737 IID = Intrinsic::x86_avx512_vpermi2var_d_512; 1738 else if (VecWidth == 512 && EltWidth == 64 && IsFloat) 1739 IID = Intrinsic::x86_avx512_vpermi2var_pd_512; 1740 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) 1741 IID = Intrinsic::x86_avx512_vpermi2var_q_512; 1742 else if (VecWidth == 128 && EltWidth == 16) 1743 IID = Intrinsic::x86_avx512_vpermi2var_hi_128; 1744 else if (VecWidth == 256 && EltWidth == 16) 1745 IID = Intrinsic::x86_avx512_vpermi2var_hi_256; 1746 else if (VecWidth == 512 && EltWidth == 16) 1747 IID = Intrinsic::x86_avx512_vpermi2var_hi_512; 1748 else if (VecWidth == 128 && EltWidth == 8) 1749 IID = Intrinsic::x86_avx512_vpermi2var_qi_128; 1750 else if (VecWidth == 256 && EltWidth == 8) 1751 IID = Intrinsic::x86_avx512_vpermi2var_qi_256; 1752 else if (VecWidth == 512 && EltWidth == 8) 1753 IID = Intrinsic::x86_avx512_vpermi2var_qi_512; 1754 else 1755 llvm_unreachable("Unexpected intrinsic"); 1756 1757 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1), 1758 CI.getArgOperand(2) }; 1759 1760 // If this isn't index form we need to swap operand 0 and 1. 1761 if (!IndexForm) 1762 std::swap(Args[0], Args[1]); 1763 1764 Value *V = Builder.CreateIntrinsic(IID, {}, Args); 1765 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) 1766 : Builder.CreateBitCast(CI.getArgOperand(1), 1767 Ty); 1768 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru); 1769 } 1770 1771 static Value *upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, 1772 Intrinsic::ID IID) { 1773 Type *Ty = CI.getType(); 1774 Value *Op0 = CI.getOperand(0); 1775 Value *Op1 = CI.getOperand(1); 1776 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1}); 1777 1778 if (CI.arg_size() == 4) { // For masked intrinsics. 1779 Value *VecSrc = CI.getOperand(2); 1780 Value *Mask = CI.getOperand(3); 1781 Res = emitX86Select(Builder, Mask, Res, VecSrc); 1782 } 1783 return Res; 1784 } 1785 1786 static Value *upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, 1787 bool IsRotateRight) { 1788 Type *Ty = CI.getType(); 1789 Value *Src = CI.getArgOperand(0); 1790 Value *Amt = CI.getArgOperand(1); 1791 1792 // Amount may be scalar immediate, in which case create a splat vector. 1793 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so 1794 // we only care about the lowest log2 bits anyway. 1795 if (Amt->getType() != Ty) { 1796 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements(); 1797 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false); 1798 Amt = Builder.CreateVectorSplat(NumElts, Amt); 1799 } 1800 1801 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl; 1802 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt}); 1803 1804 if (CI.arg_size() == 4) { // For masked intrinsics. 1805 Value *VecSrc = CI.getOperand(2); 1806 Value *Mask = CI.getOperand(3); 1807 Res = emitX86Select(Builder, Mask, Res, VecSrc); 1808 } 1809 return Res; 1810 } 1811 1812 static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, 1813 bool IsSigned) { 1814 Type *Ty = CI.getType(); 1815 Value *LHS = CI.getArgOperand(0); 1816 Value *RHS = CI.getArgOperand(1); 1817 1818 CmpInst::Predicate Pred; 1819 switch (Imm) { 1820 case 0x0: 1821 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; 1822 break; 1823 case 0x1: 1824 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; 1825 break; 1826 case 0x2: 1827 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; 1828 break; 1829 case 0x3: 1830 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; 1831 break; 1832 case 0x4: 1833 Pred = ICmpInst::ICMP_EQ; 1834 break; 1835 case 0x5: 1836 Pred = ICmpInst::ICMP_NE; 1837 break; 1838 case 0x6: 1839 return Constant::getNullValue(Ty); // FALSE 1840 case 0x7: 1841 return Constant::getAllOnesValue(Ty); // TRUE 1842 default: 1843 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate"); 1844 } 1845 1846 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS); 1847 Value *Ext = Builder.CreateSExt(Cmp, Ty); 1848 return Ext; 1849 } 1850 1851 static Value *upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, 1852 bool IsShiftRight, bool ZeroMask) { 1853 Type *Ty = CI.getType(); 1854 Value *Op0 = CI.getArgOperand(0); 1855 Value *Op1 = CI.getArgOperand(1); 1856 Value *Amt = CI.getArgOperand(2); 1857 1858 if (IsShiftRight) 1859 std::swap(Op0, Op1); 1860 1861 // Amount may be scalar immediate, in which case create a splat vector. 1862 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so 1863 // we only care about the lowest log2 bits anyway. 1864 if (Amt->getType() != Ty) { 1865 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements(); 1866 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false); 1867 Amt = Builder.CreateVectorSplat(NumElts, Amt); 1868 } 1869 1870 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl; 1871 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt}); 1872 1873 unsigned NumArgs = CI.arg_size(); 1874 if (NumArgs >= 4) { // For masked intrinsics. 1875 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) : 1876 ZeroMask ? ConstantAggregateZero::get(CI.getType()) : 1877 CI.getArgOperand(0); 1878 Value *Mask = CI.getOperand(NumArgs - 1); 1879 Res = emitX86Select(Builder, Mask, Res, VecSrc); 1880 } 1881 return Res; 1882 } 1883 1884 static Value *upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, 1885 Value *Mask, bool Aligned) { 1886 const Align Alignment = 1887 Aligned 1888 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8) 1889 : Align(1); 1890 1891 // If the mask is all ones just emit a regular store. 1892 if (const auto *C = dyn_cast<Constant>(Mask)) 1893 if (C->isAllOnesValue()) 1894 return Builder.CreateAlignedStore(Data, Ptr, Alignment); 1895 1896 // Convert the mask from an integer type to a vector of i1. 1897 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements(); 1898 Mask = getX86MaskVec(Builder, Mask, NumElts); 1899 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask); 1900 } 1901 1902 static Value *upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, 1903 Value *Passthru, Value *Mask, bool Aligned) { 1904 Type *ValTy = Passthru->getType(); 1905 const Align Alignment = 1906 Aligned 1907 ? Align( 1908 Passthru->getType()->getPrimitiveSizeInBits().getFixedValue() / 1909 8) 1910 : Align(1); 1911 1912 // If the mask is all ones just emit a regular store. 1913 if (const auto *C = dyn_cast<Constant>(Mask)) 1914 if (C->isAllOnesValue()) 1915 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment); 1916 1917 // Convert the mask from an integer type to a vector of i1. 1918 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements(); 1919 Mask = getX86MaskVec(Builder, Mask, NumElts); 1920 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru); 1921 } 1922 1923 static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) { 1924 Type *Ty = CI.getType(); 1925 Value *Op0 = CI.getArgOperand(0); 1926 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty, 1927 {Op0, Builder.getInt1(false)}); 1928 if (CI.arg_size() == 3) 1929 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1)); 1930 return Res; 1931 } 1932 1933 static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) { 1934 Type *Ty = CI.getType(); 1935 1936 // Arguments have a vXi32 type so cast to vXi64. 1937 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty); 1938 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty); 1939 1940 if (IsSigned) { 1941 // Shift left then arithmetic shift right. 1942 Constant *ShiftAmt = ConstantInt::get(Ty, 32); 1943 LHS = Builder.CreateShl(LHS, ShiftAmt); 1944 LHS = Builder.CreateAShr(LHS, ShiftAmt); 1945 RHS = Builder.CreateShl(RHS, ShiftAmt); 1946 RHS = Builder.CreateAShr(RHS, ShiftAmt); 1947 } else { 1948 // Clear the upper bits. 1949 Constant *Mask = ConstantInt::get(Ty, 0xffffffff); 1950 LHS = Builder.CreateAnd(LHS, Mask); 1951 RHS = Builder.CreateAnd(RHS, Mask); 1952 } 1953 1954 Value *Res = Builder.CreateMul(LHS, RHS); 1955 1956 if (CI.arg_size() == 4) 1957 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2)); 1958 1959 return Res; 1960 } 1961 1962 // Applying mask on vector of i1's and make sure result is at least 8 bits wide. 1963 static Value *applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, 1964 Value *Mask) { 1965 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements(); 1966 if (Mask) { 1967 const auto *C = dyn_cast<Constant>(Mask); 1968 if (!C || !C->isAllOnesValue()) 1969 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts)); 1970 } 1971 1972 if (NumElts < 8) { 1973 int Indices[8]; 1974 for (unsigned i = 0; i != NumElts; ++i) 1975 Indices[i] = i; 1976 for (unsigned i = NumElts; i != 8; ++i) 1977 Indices[i] = NumElts + i % NumElts; 1978 Vec = Builder.CreateShuffleVector(Vec, 1979 Constant::getNullValue(Vec->getType()), 1980 Indices); 1981 } 1982 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U))); 1983 } 1984 1985 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, 1986 unsigned CC, bool Signed) { 1987 Value *Op0 = CI.getArgOperand(0); 1988 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements(); 1989 1990 Value *Cmp; 1991 if (CC == 3) { 1992 Cmp = Constant::getNullValue( 1993 FixedVectorType::get(Builder.getInt1Ty(), NumElts)); 1994 } else if (CC == 7) { 1995 Cmp = Constant::getAllOnesValue( 1996 FixedVectorType::get(Builder.getInt1Ty(), NumElts)); 1997 } else { 1998 ICmpInst::Predicate Pred; 1999 switch (CC) { 2000 default: llvm_unreachable("Unknown condition code"); 2001 case 0: Pred = ICmpInst::ICMP_EQ; break; 2002 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break; 2003 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break; 2004 case 4: Pred = ICmpInst::ICMP_NE; break; 2005 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break; 2006 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break; 2007 } 2008 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1)); 2009 } 2010 2011 Value *Mask = CI.getArgOperand(CI.arg_size() - 1); 2012 2013 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask); 2014 } 2015 2016 // Replace a masked intrinsic with an older unmasked intrinsic. 2017 static Value *upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, 2018 Intrinsic::ID IID) { 2019 Value *Rep = Builder.CreateIntrinsic( 2020 IID, {}, {CI.getArgOperand(0), CI.getArgOperand(1)}); 2021 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2)); 2022 } 2023 2024 static Value *upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI) { 2025 Value* A = CI.getArgOperand(0); 2026 Value* B = CI.getArgOperand(1); 2027 Value* Src = CI.getArgOperand(2); 2028 Value* Mask = CI.getArgOperand(3); 2029 2030 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1)); 2031 Value* Cmp = Builder.CreateIsNotNull(AndNode); 2032 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0); 2033 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0); 2034 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2); 2035 return Builder.CreateInsertElement(A, Select, (uint64_t)0); 2036 } 2037 2038 static Value *upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI) { 2039 Value* Op = CI.getArgOperand(0); 2040 Type* ReturnOp = CI.getType(); 2041 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements(); 2042 Value *Mask = getX86MaskVec(Builder, Op, NumElts); 2043 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2"); 2044 } 2045 2046 // Replace intrinsic with unmasked version and a select. 2047 static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, 2048 CallBase &CI, Value *&Rep) { 2049 Name = Name.substr(12); // Remove avx512.mask. 2050 2051 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits(); 2052 unsigned EltWidth = CI.getType()->getScalarSizeInBits(); 2053 Intrinsic::ID IID; 2054 if (Name.starts_with("max.p")) { 2055 if (VecWidth == 128 && EltWidth == 32) 2056 IID = Intrinsic::x86_sse_max_ps; 2057 else if (VecWidth == 128 && EltWidth == 64) 2058 IID = Intrinsic::x86_sse2_max_pd; 2059 else if (VecWidth == 256 && EltWidth == 32) 2060 IID = Intrinsic::x86_avx_max_ps_256; 2061 else if (VecWidth == 256 && EltWidth == 64) 2062 IID = Intrinsic::x86_avx_max_pd_256; 2063 else 2064 llvm_unreachable("Unexpected intrinsic"); 2065 } else if (Name.starts_with("min.p")) { 2066 if (VecWidth == 128 && EltWidth == 32) 2067 IID = Intrinsic::x86_sse_min_ps; 2068 else if (VecWidth == 128 && EltWidth == 64) 2069 IID = Intrinsic::x86_sse2_min_pd; 2070 else if (VecWidth == 256 && EltWidth == 32) 2071 IID = Intrinsic::x86_avx_min_ps_256; 2072 else if (VecWidth == 256 && EltWidth == 64) 2073 IID = Intrinsic::x86_avx_min_pd_256; 2074 else 2075 llvm_unreachable("Unexpected intrinsic"); 2076 } else if (Name.starts_with("pshuf.b.")) { 2077 if (VecWidth == 128) 2078 IID = Intrinsic::x86_ssse3_pshuf_b_128; 2079 else if (VecWidth == 256) 2080 IID = Intrinsic::x86_avx2_pshuf_b; 2081 else if (VecWidth == 512) 2082 IID = Intrinsic::x86_avx512_pshuf_b_512; 2083 else 2084 llvm_unreachable("Unexpected intrinsic"); 2085 } else if (Name.starts_with("pmul.hr.sw.")) { 2086 if (VecWidth == 128) 2087 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128; 2088 else if (VecWidth == 256) 2089 IID = Intrinsic::x86_avx2_pmul_hr_sw; 2090 else if (VecWidth == 512) 2091 IID = Intrinsic::x86_avx512_pmul_hr_sw_512; 2092 else 2093 llvm_unreachable("Unexpected intrinsic"); 2094 } else if (Name.starts_with("pmulh.w.")) { 2095 if (VecWidth == 128) 2096 IID = Intrinsic::x86_sse2_pmulh_w; 2097 else if (VecWidth == 256) 2098 IID = Intrinsic::x86_avx2_pmulh_w; 2099 else if (VecWidth == 512) 2100 IID = Intrinsic::x86_avx512_pmulh_w_512; 2101 else 2102 llvm_unreachable("Unexpected intrinsic"); 2103 } else if (Name.starts_with("pmulhu.w.")) { 2104 if (VecWidth == 128) 2105 IID = Intrinsic::x86_sse2_pmulhu_w; 2106 else if (VecWidth == 256) 2107 IID = Intrinsic::x86_avx2_pmulhu_w; 2108 else if (VecWidth == 512) 2109 IID = Intrinsic::x86_avx512_pmulhu_w_512; 2110 else 2111 llvm_unreachable("Unexpected intrinsic"); 2112 } else if (Name.starts_with("pmaddw.d.")) { 2113 if (VecWidth == 128) 2114 IID = Intrinsic::x86_sse2_pmadd_wd; 2115 else if (VecWidth == 256) 2116 IID = Intrinsic::x86_avx2_pmadd_wd; 2117 else if (VecWidth == 512) 2118 IID = Intrinsic::x86_avx512_pmaddw_d_512; 2119 else 2120 llvm_unreachable("Unexpected intrinsic"); 2121 } else if (Name.starts_with("pmaddubs.w.")) { 2122 if (VecWidth == 128) 2123 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128; 2124 else if (VecWidth == 256) 2125 IID = Intrinsic::x86_avx2_pmadd_ub_sw; 2126 else if (VecWidth == 512) 2127 IID = Intrinsic::x86_avx512_pmaddubs_w_512; 2128 else 2129 llvm_unreachable("Unexpected intrinsic"); 2130 } else if (Name.starts_with("packsswb.")) { 2131 if (VecWidth == 128) 2132 IID = Intrinsic::x86_sse2_packsswb_128; 2133 else if (VecWidth == 256) 2134 IID = Intrinsic::x86_avx2_packsswb; 2135 else if (VecWidth == 512) 2136 IID = Intrinsic::x86_avx512_packsswb_512; 2137 else 2138 llvm_unreachable("Unexpected intrinsic"); 2139 } else if (Name.starts_with("packssdw.")) { 2140 if (VecWidth == 128) 2141 IID = Intrinsic::x86_sse2_packssdw_128; 2142 else if (VecWidth == 256) 2143 IID = Intrinsic::x86_avx2_packssdw; 2144 else if (VecWidth == 512) 2145 IID = Intrinsic::x86_avx512_packssdw_512; 2146 else 2147 llvm_unreachable("Unexpected intrinsic"); 2148 } else if (Name.starts_with("packuswb.")) { 2149 if (VecWidth == 128) 2150 IID = Intrinsic::x86_sse2_packuswb_128; 2151 else if (VecWidth == 256) 2152 IID = Intrinsic::x86_avx2_packuswb; 2153 else if (VecWidth == 512) 2154 IID = Intrinsic::x86_avx512_packuswb_512; 2155 else 2156 llvm_unreachable("Unexpected intrinsic"); 2157 } else if (Name.starts_with("packusdw.")) { 2158 if (VecWidth == 128) 2159 IID = Intrinsic::x86_sse41_packusdw; 2160 else if (VecWidth == 256) 2161 IID = Intrinsic::x86_avx2_packusdw; 2162 else if (VecWidth == 512) 2163 IID = Intrinsic::x86_avx512_packusdw_512; 2164 else 2165 llvm_unreachable("Unexpected intrinsic"); 2166 } else if (Name.starts_with("vpermilvar.")) { 2167 if (VecWidth == 128 && EltWidth == 32) 2168 IID = Intrinsic::x86_avx_vpermilvar_ps; 2169 else if (VecWidth == 128 && EltWidth == 64) 2170 IID = Intrinsic::x86_avx_vpermilvar_pd; 2171 else if (VecWidth == 256 && EltWidth == 32) 2172 IID = Intrinsic::x86_avx_vpermilvar_ps_256; 2173 else if (VecWidth == 256 && EltWidth == 64) 2174 IID = Intrinsic::x86_avx_vpermilvar_pd_256; 2175 else if (VecWidth == 512 && EltWidth == 32) 2176 IID = Intrinsic::x86_avx512_vpermilvar_ps_512; 2177 else if (VecWidth == 512 && EltWidth == 64) 2178 IID = Intrinsic::x86_avx512_vpermilvar_pd_512; 2179 else 2180 llvm_unreachable("Unexpected intrinsic"); 2181 } else if (Name == "cvtpd2dq.256") { 2182 IID = Intrinsic::x86_avx_cvt_pd2dq_256; 2183 } else if (Name == "cvtpd2ps.256") { 2184 IID = Intrinsic::x86_avx_cvt_pd2_ps_256; 2185 } else if (Name == "cvttpd2dq.256") { 2186 IID = Intrinsic::x86_avx_cvtt_pd2dq_256; 2187 } else if (Name == "cvttps2dq.128") { 2188 IID = Intrinsic::x86_sse2_cvttps2dq; 2189 } else if (Name == "cvttps2dq.256") { 2190 IID = Intrinsic::x86_avx_cvtt_ps2dq_256; 2191 } else if (Name.starts_with("permvar.")) { 2192 bool IsFloat = CI.getType()->isFPOrFPVectorTy(); 2193 if (VecWidth == 256 && EltWidth == 32 && IsFloat) 2194 IID = Intrinsic::x86_avx2_permps; 2195 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat) 2196 IID = Intrinsic::x86_avx2_permd; 2197 else if (VecWidth == 256 && EltWidth == 64 && IsFloat) 2198 IID = Intrinsic::x86_avx512_permvar_df_256; 2199 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat) 2200 IID = Intrinsic::x86_avx512_permvar_di_256; 2201 else if (VecWidth == 512 && EltWidth == 32 && IsFloat) 2202 IID = Intrinsic::x86_avx512_permvar_sf_512; 2203 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat) 2204 IID = Intrinsic::x86_avx512_permvar_si_512; 2205 else if (VecWidth == 512 && EltWidth == 64 && IsFloat) 2206 IID = Intrinsic::x86_avx512_permvar_df_512; 2207 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat) 2208 IID = Intrinsic::x86_avx512_permvar_di_512; 2209 else if (VecWidth == 128 && EltWidth == 16) 2210 IID = Intrinsic::x86_avx512_permvar_hi_128; 2211 else if (VecWidth == 256 && EltWidth == 16) 2212 IID = Intrinsic::x86_avx512_permvar_hi_256; 2213 else if (VecWidth == 512 && EltWidth == 16) 2214 IID = Intrinsic::x86_avx512_permvar_hi_512; 2215 else if (VecWidth == 128 && EltWidth == 8) 2216 IID = Intrinsic::x86_avx512_permvar_qi_128; 2217 else if (VecWidth == 256 && EltWidth == 8) 2218 IID = Intrinsic::x86_avx512_permvar_qi_256; 2219 else if (VecWidth == 512 && EltWidth == 8) 2220 IID = Intrinsic::x86_avx512_permvar_qi_512; 2221 else 2222 llvm_unreachable("Unexpected intrinsic"); 2223 } else if (Name.starts_with("dbpsadbw.")) { 2224 if (VecWidth == 128) 2225 IID = Intrinsic::x86_avx512_dbpsadbw_128; 2226 else if (VecWidth == 256) 2227 IID = Intrinsic::x86_avx512_dbpsadbw_256; 2228 else if (VecWidth == 512) 2229 IID = Intrinsic::x86_avx512_dbpsadbw_512; 2230 else 2231 llvm_unreachable("Unexpected intrinsic"); 2232 } else if (Name.starts_with("pmultishift.qb.")) { 2233 if (VecWidth == 128) 2234 IID = Intrinsic::x86_avx512_pmultishift_qb_128; 2235 else if (VecWidth == 256) 2236 IID = Intrinsic::x86_avx512_pmultishift_qb_256; 2237 else if (VecWidth == 512) 2238 IID = Intrinsic::x86_avx512_pmultishift_qb_512; 2239 else 2240 llvm_unreachable("Unexpected intrinsic"); 2241 } else if (Name.starts_with("conflict.")) { 2242 if (Name[9] == 'd' && VecWidth == 128) 2243 IID = Intrinsic::x86_avx512_conflict_d_128; 2244 else if (Name[9] == 'd' && VecWidth == 256) 2245 IID = Intrinsic::x86_avx512_conflict_d_256; 2246 else if (Name[9] == 'd' && VecWidth == 512) 2247 IID = Intrinsic::x86_avx512_conflict_d_512; 2248 else if (Name[9] == 'q' && VecWidth == 128) 2249 IID = Intrinsic::x86_avx512_conflict_q_128; 2250 else if (Name[9] == 'q' && VecWidth == 256) 2251 IID = Intrinsic::x86_avx512_conflict_q_256; 2252 else if (Name[9] == 'q' && VecWidth == 512) 2253 IID = Intrinsic::x86_avx512_conflict_q_512; 2254 else 2255 llvm_unreachable("Unexpected intrinsic"); 2256 } else if (Name.starts_with("pavg.")) { 2257 if (Name[5] == 'b' && VecWidth == 128) 2258 IID = Intrinsic::x86_sse2_pavg_b; 2259 else if (Name[5] == 'b' && VecWidth == 256) 2260 IID = Intrinsic::x86_avx2_pavg_b; 2261 else if (Name[5] == 'b' && VecWidth == 512) 2262 IID = Intrinsic::x86_avx512_pavg_b_512; 2263 else if (Name[5] == 'w' && VecWidth == 128) 2264 IID = Intrinsic::x86_sse2_pavg_w; 2265 else if (Name[5] == 'w' && VecWidth == 256) 2266 IID = Intrinsic::x86_avx2_pavg_w; 2267 else if (Name[5] == 'w' && VecWidth == 512) 2268 IID = Intrinsic::x86_avx512_pavg_w_512; 2269 else 2270 llvm_unreachable("Unexpected intrinsic"); 2271 } else 2272 return false; 2273 2274 SmallVector<Value *, 4> Args(CI.args()); 2275 Args.pop_back(); 2276 Args.pop_back(); 2277 Rep = Builder.CreateIntrinsic(IID, {}, Args); 2278 unsigned NumArgs = CI.arg_size(); 2279 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep, 2280 CI.getArgOperand(NumArgs - 2)); 2281 return true; 2282 } 2283 2284 /// Upgrade comment in call to inline asm that represents an objc retain release 2285 /// marker. 2286 void llvm::UpgradeInlineAsmString(std::string *AsmStr) { 2287 size_t Pos; 2288 if (AsmStr->find("mov\tfp") == 0 && 2289 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos && 2290 (Pos = AsmStr->find("# marker")) != std::string::npos) { 2291 AsmStr->replace(Pos, 1, ";"); 2292 } 2293 } 2294 2295 static Value *upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, 2296 Function *F, IRBuilder<> &Builder) { 2297 Value *Rep = nullptr; 2298 2299 if (Name == "abs.i" || Name == "abs.ll") { 2300 Value *Arg = CI->getArgOperand(0); 2301 Value *Neg = Builder.CreateNeg(Arg, "neg"); 2302 Value *Cmp = Builder.CreateICmpSGE( 2303 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond"); 2304 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs"); 2305 } else if (Name.starts_with("atomic.load.add.f32.p") || 2306 Name.starts_with("atomic.load.add.f64.p")) { 2307 Value *Ptr = CI->getArgOperand(0); 2308 Value *Val = CI->getArgOperand(1); 2309 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(), 2310 AtomicOrdering::SequentiallyConsistent); 2311 } else if (Name.consume_front("max.") && 2312 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || 2313 Name == "ui" || Name == "ull")) { 2314 Value *Arg0 = CI->getArgOperand(0); 2315 Value *Arg1 = CI->getArgOperand(1); 2316 Value *Cmp = Name.starts_with("u") 2317 ? Builder.CreateICmpUGE(Arg0, Arg1, "max.cond") 2318 : Builder.CreateICmpSGE(Arg0, Arg1, "max.cond"); 2319 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "max"); 2320 } else if (Name.consume_front("min.") && 2321 (Name == "s" || Name == "i" || Name == "ll" || Name == "us" || 2322 Name == "ui" || Name == "ull")) { 2323 Value *Arg0 = CI->getArgOperand(0); 2324 Value *Arg1 = CI->getArgOperand(1); 2325 Value *Cmp = Name.starts_with("u") 2326 ? Builder.CreateICmpULE(Arg0, Arg1, "min.cond") 2327 : Builder.CreateICmpSLE(Arg0, Arg1, "min.cond"); 2328 Rep = Builder.CreateSelect(Cmp, Arg0, Arg1, "min"); 2329 } else if (Name == "clz.ll") { 2330 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64. 2331 Value *Arg = CI->getArgOperand(0); 2332 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()}, 2333 {Arg, Builder.getFalse()}, 2334 /*FMFSource=*/nullptr, "ctlz"); 2335 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc"); 2336 } else if (Name == "popc.ll") { 2337 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an 2338 // i64. 2339 Value *Arg = CI->getArgOperand(0); 2340 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()}, 2341 Arg, /*FMFSource=*/nullptr, "ctpop"); 2342 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc"); 2343 } else if (Name == "h2f") { 2344 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16, 2345 {Builder.getFloatTy()}, CI->getArgOperand(0), 2346 /*FMFSource=*/nullptr, "h2f"); 2347 } else if (Name.consume_front("bitcast.") && 2348 (Name == "f2i" || Name == "i2f" || Name == "ll2d" || 2349 Name == "d2ll")) { 2350 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType()); 2351 } else if (Name == "rotate.b32") { 2352 Value *Arg = CI->getOperand(0); 2353 Value *ShiftAmt = CI->getOperand(1); 2354 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl, 2355 {Arg, Arg, ShiftAmt}); 2356 } else if (Name == "rotate.b64") { 2357 Type *Int64Ty = Builder.getInt64Ty(); 2358 Value *Arg = CI->getOperand(0); 2359 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty); 2360 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl, 2361 {Arg, Arg, ZExtShiftAmt}); 2362 } else if (Name == "rotate.right.b64") { 2363 Type *Int64Ty = Builder.getInt64Ty(); 2364 Value *Arg = CI->getOperand(0); 2365 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty); 2366 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr, 2367 {Arg, Arg, ZExtShiftAmt}); 2368 } else if ((Name.consume_front("ptr.gen.to.") && 2369 (Name.starts_with("local") || Name.starts_with("shared") || 2370 Name.starts_with("global") || Name.starts_with("constant"))) || 2371 (Name.consume_front("ptr.") && 2372 (Name.consume_front("local") || Name.consume_front("shared") || 2373 Name.consume_front("global") || 2374 Name.consume_front("constant")) && 2375 Name.starts_with(".to.gen"))) { 2376 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType()); 2377 } else if (Name.consume_front("ldg.global")) { 2378 Value *Ptr = CI->getArgOperand(0); 2379 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue(); 2380 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL 2381 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1)); 2382 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign); 2383 MDNode *MD = MDNode::get(Builder.getContext(), {}); 2384 LD->setMetadata(LLVMContext::MD_invariant_load, MD); 2385 return LD; 2386 } else { 2387 Intrinsic::ID IID = shouldUpgradeNVPTXBF16Intrinsic(Name); 2388 if (IID != Intrinsic::not_intrinsic && 2389 !F->getReturnType()->getScalarType()->isBFloatTy()) { 2390 rename(F); 2391 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID); 2392 SmallVector<Value *, 2> Args; 2393 for (size_t I = 0; I < NewFn->arg_size(); ++I) { 2394 Value *Arg = CI->getArgOperand(I); 2395 Type *OldType = Arg->getType(); 2396 Type *NewType = NewFn->getArg(I)->getType(); 2397 Args.push_back( 2398 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy()) 2399 ? Builder.CreateBitCast(Arg, NewType) 2400 : Arg); 2401 } 2402 Rep = Builder.CreateCall(NewFn, Args); 2403 if (F->getReturnType()->isIntegerTy()) 2404 Rep = Builder.CreateBitCast(Rep, F->getReturnType()); 2405 } 2406 } 2407 2408 return Rep; 2409 } 2410 2411 static Value *upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, 2412 IRBuilder<> &Builder) { 2413 LLVMContext &C = F->getContext(); 2414 Value *Rep = nullptr; 2415 2416 if (Name.starts_with("sse4a.movnt.")) { 2417 SmallVector<Metadata *, 1> Elts; 2418 Elts.push_back( 2419 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 2420 MDNode *Node = MDNode::get(C, Elts); 2421 2422 Value *Arg0 = CI->getArgOperand(0); 2423 Value *Arg1 = CI->getArgOperand(1); 2424 2425 // Nontemporal (unaligned) store of the 0'th element of the float/double 2426 // vector. 2427 Value *Extract = 2428 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement"); 2429 2430 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1)); 2431 SI->setMetadata(LLVMContext::MD_nontemporal, Node); 2432 } else if (Name.starts_with("avx.movnt.") || 2433 Name.starts_with("avx512.storent.")) { 2434 SmallVector<Metadata *, 1> Elts; 2435 Elts.push_back( 2436 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 2437 MDNode *Node = MDNode::get(C, Elts); 2438 2439 Value *Arg0 = CI->getArgOperand(0); 2440 Value *Arg1 = CI->getArgOperand(1); 2441 2442 StoreInst *SI = Builder.CreateAlignedStore( 2443 Arg1, Arg0, 2444 Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)); 2445 SI->setMetadata(LLVMContext::MD_nontemporal, Node); 2446 } else if (Name == "sse2.storel.dq") { 2447 Value *Arg0 = CI->getArgOperand(0); 2448 Value *Arg1 = CI->getArgOperand(1); 2449 2450 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2); 2451 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 2452 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0); 2453 Builder.CreateAlignedStore(Elt, Arg0, Align(1)); 2454 } else if (Name.starts_with("sse.storeu.") || 2455 Name.starts_with("sse2.storeu.") || 2456 Name.starts_with("avx.storeu.")) { 2457 Value *Arg0 = CI->getArgOperand(0); 2458 Value *Arg1 = CI->getArgOperand(1); 2459 Builder.CreateAlignedStore(Arg1, Arg0, Align(1)); 2460 } else if (Name == "avx512.mask.store.ss") { 2461 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1)); 2462 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1), 2463 Mask, false); 2464 } else if (Name.starts_with("avx512.mask.store")) { 2465 // "avx512.mask.storeu." or "avx512.mask.store." 2466 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu". 2467 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1), 2468 CI->getArgOperand(2), Aligned); 2469 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) { 2470 // Upgrade packed integer vector compare intrinsics to compare instructions. 2471 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt." 2472 bool CmpEq = Name[9] == 'e'; 2473 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT, 2474 CI->getArgOperand(0), CI->getArgOperand(1)); 2475 Rep = Builder.CreateSExt(Rep, CI->getType(), ""); 2476 } else if (Name.starts_with("avx512.broadcastm")) { 2477 Type *ExtTy = Type::getInt32Ty(C); 2478 if (CI->getOperand(0)->getType()->isIntegerTy(8)) 2479 ExtTy = Type::getInt64Ty(C); 2480 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 2481 ExtTy->getPrimitiveSizeInBits(); 2482 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy); 2483 Rep = Builder.CreateVectorSplat(NumElts, Rep); 2484 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") { 2485 Value *Vec = CI->getArgOperand(0); 2486 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0); 2487 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0); 2488 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0); 2489 } else if (Name.starts_with("avx.sqrt.p") || 2490 Name.starts_with("sse2.sqrt.p") || 2491 Name.starts_with("sse.sqrt.p")) { 2492 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(), 2493 {CI->getArgOperand(0)}); 2494 } else if (Name.starts_with("avx512.mask.sqrt.p")) { 2495 if (CI->arg_size() == 4 && 2496 (!isa<ConstantInt>(CI->getArgOperand(3)) || 2497 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) { 2498 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512 2499 : Intrinsic::x86_avx512_sqrt_pd_512; 2500 2501 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)}; 2502 Rep = Builder.CreateIntrinsic(IID, {}, Args); 2503 } else { 2504 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(), 2505 {CI->getArgOperand(0)}); 2506 } 2507 Rep = 2508 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); 2509 } else if (Name.starts_with("avx512.ptestm") || 2510 Name.starts_with("avx512.ptestnm")) { 2511 Value *Op0 = CI->getArgOperand(0); 2512 Value *Op1 = CI->getArgOperand(1); 2513 Value *Mask = CI->getArgOperand(2); 2514 Rep = Builder.CreateAnd(Op0, Op1); 2515 llvm::Type *Ty = Op0->getType(); 2516 Value *Zero = llvm::Constant::getNullValue(Ty); 2517 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm") 2518 ? ICmpInst::ICMP_NE 2519 : ICmpInst::ICMP_EQ; 2520 Rep = Builder.CreateICmp(Pred, Rep, Zero); 2521 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask); 2522 } else if (Name.starts_with("avx512.mask.pbroadcast")) { 2523 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType()) 2524 ->getNumElements(); 2525 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0)); 2526 Rep = 2527 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); 2528 } else if (Name.starts_with("avx512.kunpck")) { 2529 unsigned NumElts = CI->getType()->getScalarSizeInBits(); 2530 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts); 2531 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts); 2532 int Indices[64]; 2533 for (unsigned i = 0; i != NumElts; ++i) 2534 Indices[i] = i; 2535 2536 // First extract half of each vector. This gives better codegen than 2537 // doing it in a single shuffle. 2538 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2)); 2539 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2)); 2540 // Concat the vectors. 2541 // NOTE: Operands have to be swapped to match intrinsic definition. 2542 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts)); 2543 Rep = Builder.CreateBitCast(Rep, CI->getType()); 2544 } else if (Name == "avx512.kand.w") { 2545 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 2546 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 2547 Rep = Builder.CreateAnd(LHS, RHS); 2548 Rep = Builder.CreateBitCast(Rep, CI->getType()); 2549 } else if (Name == "avx512.kandn.w") { 2550 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 2551 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 2552 LHS = Builder.CreateNot(LHS); 2553 Rep = Builder.CreateAnd(LHS, RHS); 2554 Rep = Builder.CreateBitCast(Rep, CI->getType()); 2555 } else if (Name == "avx512.kor.w") { 2556 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 2557 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 2558 Rep = Builder.CreateOr(LHS, RHS); 2559 Rep = Builder.CreateBitCast(Rep, CI->getType()); 2560 } else if (Name == "avx512.kxor.w") { 2561 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 2562 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 2563 Rep = Builder.CreateXor(LHS, RHS); 2564 Rep = Builder.CreateBitCast(Rep, CI->getType()); 2565 } else if (Name == "avx512.kxnor.w") { 2566 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 2567 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 2568 LHS = Builder.CreateNot(LHS); 2569 Rep = Builder.CreateXor(LHS, RHS); 2570 Rep = Builder.CreateBitCast(Rep, CI->getType()); 2571 } else if (Name == "avx512.knot.w") { 2572 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 2573 Rep = Builder.CreateNot(Rep); 2574 Rep = Builder.CreateBitCast(Rep, CI->getType()); 2575 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") { 2576 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16); 2577 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16); 2578 Rep = Builder.CreateOr(LHS, RHS); 2579 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty()); 2580 Value *C; 2581 if (Name[14] == 'c') 2582 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty()); 2583 else 2584 C = ConstantInt::getNullValue(Builder.getInt16Ty()); 2585 Rep = Builder.CreateICmpEQ(Rep, C); 2586 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty()); 2587 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" || 2588 Name == "sse.sub.ss" || Name == "sse2.sub.sd" || 2589 Name == "sse.mul.ss" || Name == "sse2.mul.sd" || 2590 Name == "sse.div.ss" || Name == "sse2.div.sd") { 2591 Type *I32Ty = Type::getInt32Ty(C); 2592 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0), 2593 ConstantInt::get(I32Ty, 0)); 2594 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1), 2595 ConstantInt::get(I32Ty, 0)); 2596 Value *EltOp; 2597 if (Name.contains(".add.")) 2598 EltOp = Builder.CreateFAdd(Elt0, Elt1); 2599 else if (Name.contains(".sub.")) 2600 EltOp = Builder.CreateFSub(Elt0, Elt1); 2601 else if (Name.contains(".mul.")) 2602 EltOp = Builder.CreateFMul(Elt0, Elt1); 2603 else 2604 EltOp = Builder.CreateFDiv(Elt0, Elt1); 2605 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp, 2606 ConstantInt::get(I32Ty, 0)); 2607 } else if (Name.starts_with("avx512.mask.pcmp")) { 2608 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt." 2609 bool CmpEq = Name[16] == 'e'; 2610 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true); 2611 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) { 2612 Type *OpTy = CI->getArgOperand(0)->getType(); 2613 unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); 2614 Intrinsic::ID IID; 2615 switch (VecWidth) { 2616 default: 2617 llvm_unreachable("Unexpected intrinsic"); 2618 case 128: 2619 IID = Intrinsic::x86_avx512_vpshufbitqmb_128; 2620 break; 2621 case 256: 2622 IID = Intrinsic::x86_avx512_vpshufbitqmb_256; 2623 break; 2624 case 512: 2625 IID = Intrinsic::x86_avx512_vpshufbitqmb_512; 2626 break; 2627 } 2628 2629 Rep = Builder.CreateIntrinsic(IID, {}, 2630 {CI->getOperand(0), CI->getArgOperand(1)}); 2631 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2)); 2632 } else if (Name.starts_with("avx512.mask.fpclass.p")) { 2633 Type *OpTy = CI->getArgOperand(0)->getType(); 2634 unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); 2635 unsigned EltWidth = OpTy->getScalarSizeInBits(); 2636 Intrinsic::ID IID; 2637 if (VecWidth == 128 && EltWidth == 32) 2638 IID = Intrinsic::x86_avx512_fpclass_ps_128; 2639 else if (VecWidth == 256 && EltWidth == 32) 2640 IID = Intrinsic::x86_avx512_fpclass_ps_256; 2641 else if (VecWidth == 512 && EltWidth == 32) 2642 IID = Intrinsic::x86_avx512_fpclass_ps_512; 2643 else if (VecWidth == 128 && EltWidth == 64) 2644 IID = Intrinsic::x86_avx512_fpclass_pd_128; 2645 else if (VecWidth == 256 && EltWidth == 64) 2646 IID = Intrinsic::x86_avx512_fpclass_pd_256; 2647 else if (VecWidth == 512 && EltWidth == 64) 2648 IID = Intrinsic::x86_avx512_fpclass_pd_512; 2649 else 2650 llvm_unreachable("Unexpected intrinsic"); 2651 2652 Rep = Builder.CreateIntrinsic(IID, {}, 2653 {CI->getOperand(0), CI->getArgOperand(1)}); 2654 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2)); 2655 } else if (Name.starts_with("avx512.cmp.p")) { 2656 SmallVector<Value *, 4> Args(CI->args()); 2657 Type *OpTy = Args[0]->getType(); 2658 unsigned VecWidth = OpTy->getPrimitiveSizeInBits(); 2659 unsigned EltWidth = OpTy->getScalarSizeInBits(); 2660 Intrinsic::ID IID; 2661 if (VecWidth == 128 && EltWidth == 32) 2662 IID = Intrinsic::x86_avx512_mask_cmp_ps_128; 2663 else if (VecWidth == 256 && EltWidth == 32) 2664 IID = Intrinsic::x86_avx512_mask_cmp_ps_256; 2665 else if (VecWidth == 512 && EltWidth == 32) 2666 IID = Intrinsic::x86_avx512_mask_cmp_ps_512; 2667 else if (VecWidth == 128 && EltWidth == 64) 2668 IID = Intrinsic::x86_avx512_mask_cmp_pd_128; 2669 else if (VecWidth == 256 && EltWidth == 64) 2670 IID = Intrinsic::x86_avx512_mask_cmp_pd_256; 2671 else if (VecWidth == 512 && EltWidth == 64) 2672 IID = Intrinsic::x86_avx512_mask_cmp_pd_512; 2673 else 2674 llvm_unreachable("Unexpected intrinsic"); 2675 2676 Value *Mask = Constant::getAllOnesValue(CI->getType()); 2677 if (VecWidth == 512) 2678 std::swap(Mask, Args.back()); 2679 Args.push_back(Mask); 2680 2681 Rep = Builder.CreateIntrinsic(IID, {}, Args); 2682 } else if (Name.starts_with("avx512.mask.cmp.")) { 2683 // Integer compare intrinsics. 2684 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2685 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true); 2686 } else if (Name.starts_with("avx512.mask.ucmp.")) { 2687 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2688 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false); 2689 } else if (Name.starts_with("avx512.cvtb2mask.") || 2690 Name.starts_with("avx512.cvtw2mask.") || 2691 Name.starts_with("avx512.cvtd2mask.") || 2692 Name.starts_with("avx512.cvtq2mask.")) { 2693 Value *Op = CI->getArgOperand(0); 2694 Value *Zero = llvm::Constant::getNullValue(Op->getType()); 2695 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero); 2696 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr); 2697 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" || 2698 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") || 2699 Name.starts_with("avx512.mask.pabs")) { 2700 Rep = upgradeAbs(Builder, *CI); 2701 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" || 2702 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") || 2703 Name.starts_with("avx512.mask.pmaxs")) { 2704 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax); 2705 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" || 2706 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") || 2707 Name.starts_with("avx512.mask.pmaxu")) { 2708 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax); 2709 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" || 2710 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") || 2711 Name.starts_with("avx512.mask.pmins")) { 2712 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin); 2713 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" || 2714 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") || 2715 Name.starts_with("avx512.mask.pminu")) { 2716 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin); 2717 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" || 2718 Name == "avx512.pmulu.dq.512" || 2719 Name.starts_with("avx512.mask.pmulu.dq.")) { 2720 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false); 2721 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" || 2722 Name == "avx512.pmul.dq.512" || 2723 Name.starts_with("avx512.mask.pmul.dq.")) { 2724 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true); 2725 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" || 2726 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") { 2727 Rep = 2728 Builder.CreateSIToFP(CI->getArgOperand(1), 2729 cast<VectorType>(CI->getType())->getElementType()); 2730 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); 2731 } else if (Name == "avx512.cvtusi2sd") { 2732 Rep = 2733 Builder.CreateUIToFP(CI->getArgOperand(1), 2734 cast<VectorType>(CI->getType())->getElementType()); 2735 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); 2736 } else if (Name == "sse2.cvtss2sd") { 2737 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0); 2738 Rep = Builder.CreateFPExt( 2739 Rep, cast<VectorType>(CI->getType())->getElementType()); 2740 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); 2741 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" || 2742 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" || 2743 Name.starts_with("avx512.mask.cvtdq2pd.") || 2744 Name.starts_with("avx512.mask.cvtudq2pd.") || 2745 Name.starts_with("avx512.mask.cvtdq2ps.") || 2746 Name.starts_with("avx512.mask.cvtudq2ps.") || 2747 Name.starts_with("avx512.mask.cvtqq2pd.") || 2748 Name.starts_with("avx512.mask.cvtuqq2pd.") || 2749 Name == "avx512.mask.cvtqq2ps.256" || 2750 Name == "avx512.mask.cvtqq2ps.512" || 2751 Name == "avx512.mask.cvtuqq2ps.256" || 2752 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" || 2753 Name == "avx.cvt.ps2.pd.256" || 2754 Name == "avx512.mask.cvtps2pd.128" || 2755 Name == "avx512.mask.cvtps2pd.256") { 2756 auto *DstTy = cast<FixedVectorType>(CI->getType()); 2757 Rep = CI->getArgOperand(0); 2758 auto *SrcTy = cast<FixedVectorType>(Rep->getType()); 2759 2760 unsigned NumDstElts = DstTy->getNumElements(); 2761 if (NumDstElts < SrcTy->getNumElements()) { 2762 assert(NumDstElts == 2 && "Unexpected vector size"); 2763 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1}); 2764 } 2765 2766 bool IsPS2PD = SrcTy->getElementType()->isFloatTy(); 2767 bool IsUnsigned = Name.contains("cvtu"); 2768 if (IsPS2PD) 2769 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd"); 2770 else if (CI->arg_size() == 4 && 2771 (!isa<ConstantInt>(CI->getArgOperand(3)) || 2772 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) { 2773 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round 2774 : Intrinsic::x86_avx512_sitofp_round; 2775 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy}, 2776 {Rep, CI->getArgOperand(3)}); 2777 } else { 2778 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt") 2779 : Builder.CreateSIToFP(Rep, DstTy, "cvt"); 2780 } 2781 2782 if (CI->arg_size() >= 3) 2783 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep, 2784 CI->getArgOperand(1)); 2785 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") || 2786 Name.starts_with("vcvtph2ps.")) { 2787 auto *DstTy = cast<FixedVectorType>(CI->getType()); 2788 Rep = CI->getArgOperand(0); 2789 auto *SrcTy = cast<FixedVectorType>(Rep->getType()); 2790 unsigned NumDstElts = DstTy->getNumElements(); 2791 if (NumDstElts != SrcTy->getNumElements()) { 2792 assert(NumDstElts == 4 && "Unexpected vector size"); 2793 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3}); 2794 } 2795 Rep = Builder.CreateBitCast( 2796 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts)); 2797 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps"); 2798 if (CI->arg_size() >= 3) 2799 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep, 2800 CI->getArgOperand(1)); 2801 } else if (Name.starts_with("avx512.mask.load")) { 2802 // "avx512.mask.loadu." or "avx512.mask.load." 2803 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu". 2804 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1), 2805 CI->getArgOperand(2), Aligned); 2806 } else if (Name.starts_with("avx512.mask.expand.load.")) { 2807 auto *ResultTy = cast<FixedVectorType>(CI->getType()); 2808 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), 2809 ResultTy->getNumElements()); 2810 2811 Rep = Builder.CreateIntrinsic( 2812 Intrinsic::masked_expandload, ResultTy, 2813 {CI->getOperand(0), MaskVec, CI->getOperand(1)}); 2814 } else if (Name.starts_with("avx512.mask.compress.store.")) { 2815 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType()); 2816 Value *MaskVec = 2817 getX86MaskVec(Builder, CI->getArgOperand(2), 2818 cast<FixedVectorType>(ResultTy)->getNumElements()); 2819 2820 Rep = Builder.CreateIntrinsic( 2821 Intrinsic::masked_compressstore, ResultTy, 2822 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec}); 2823 } else if (Name.starts_with("avx512.mask.compress.") || 2824 Name.starts_with("avx512.mask.expand.")) { 2825 auto *ResultTy = cast<FixedVectorType>(CI->getType()); 2826 2827 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2), 2828 ResultTy->getNumElements()); 2829 2830 bool IsCompress = Name[12] == 'c'; 2831 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress 2832 : Intrinsic::x86_avx512_mask_expand; 2833 Rep = Builder.CreateIntrinsic( 2834 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec}); 2835 } else if (Name.starts_with("xop.vpcom")) { 2836 bool IsSigned; 2837 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") || 2838 Name.ends_with("uq")) 2839 IsSigned = false; 2840 else if (Name.ends_with("b") || Name.ends_with("w") || 2841 Name.ends_with("d") || Name.ends_with("q")) 2842 IsSigned = true; 2843 else 2844 llvm_unreachable("Unknown suffix"); 2845 2846 unsigned Imm; 2847 if (CI->arg_size() == 3) { 2848 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2849 } else { 2850 Name = Name.substr(9); // strip off "xop.vpcom" 2851 if (Name.starts_with("lt")) 2852 Imm = 0; 2853 else if (Name.starts_with("le")) 2854 Imm = 1; 2855 else if (Name.starts_with("gt")) 2856 Imm = 2; 2857 else if (Name.starts_with("ge")) 2858 Imm = 3; 2859 else if (Name.starts_with("eq")) 2860 Imm = 4; 2861 else if (Name.starts_with("ne")) 2862 Imm = 5; 2863 else if (Name.starts_with("false")) 2864 Imm = 6; 2865 else if (Name.starts_with("true")) 2866 Imm = 7; 2867 else 2868 llvm_unreachable("Unknown condition"); 2869 } 2870 2871 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned); 2872 } else if (Name.starts_with("xop.vpcmov")) { 2873 Value *Sel = CI->getArgOperand(2); 2874 Value *NotSel = Builder.CreateNot(Sel); 2875 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel); 2876 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel); 2877 Rep = Builder.CreateOr(Sel0, Sel1); 2878 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") || 2879 Name.starts_with("avx512.mask.prol")) { 2880 Rep = upgradeX86Rotate(Builder, *CI, false); 2881 } else if (Name.starts_with("avx512.pror") || 2882 Name.starts_with("avx512.mask.pror")) { 2883 Rep = upgradeX86Rotate(Builder, *CI, true); 2884 } else if (Name.starts_with("avx512.vpshld.") || 2885 Name.starts_with("avx512.mask.vpshld") || 2886 Name.starts_with("avx512.maskz.vpshld")) { 2887 bool ZeroMask = Name[11] == 'z'; 2888 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask); 2889 } else if (Name.starts_with("avx512.vpshrd.") || 2890 Name.starts_with("avx512.mask.vpshrd") || 2891 Name.starts_with("avx512.maskz.vpshrd")) { 2892 bool ZeroMask = Name[11] == 'z'; 2893 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask); 2894 } else if (Name == "sse42.crc32.64.8") { 2895 Value *Trunc0 = 2896 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); 2897 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8, {}, 2898 {Trunc0, CI->getArgOperand(1)}); 2899 Rep = Builder.CreateZExt(Rep, CI->getType(), ""); 2900 } else if (Name.starts_with("avx.vbroadcast.s") || 2901 Name.starts_with("avx512.vbroadcast.s")) { 2902 // Replace broadcasts with a series of insertelements. 2903 auto *VecTy = cast<FixedVectorType>(CI->getType()); 2904 Type *EltTy = VecTy->getElementType(); 2905 unsigned EltNum = VecTy->getNumElements(); 2906 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0)); 2907 Type *I32Ty = Type::getInt32Ty(C); 2908 Rep = PoisonValue::get(VecTy); 2909 for (unsigned I = 0; I < EltNum; ++I) 2910 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I)); 2911 } else if (Name.starts_with("sse41.pmovsx") || 2912 Name.starts_with("sse41.pmovzx") || 2913 Name.starts_with("avx2.pmovsx") || 2914 Name.starts_with("avx2.pmovzx") || 2915 Name.starts_with("avx512.mask.pmovsx") || 2916 Name.starts_with("avx512.mask.pmovzx")) { 2917 auto *DstTy = cast<FixedVectorType>(CI->getType()); 2918 unsigned NumDstElts = DstTy->getNumElements(); 2919 2920 // Extract a subvector of the first NumDstElts lanes and sign/zero extend. 2921 SmallVector<int, 8> ShuffleMask(NumDstElts); 2922 for (unsigned i = 0; i != NumDstElts; ++i) 2923 ShuffleMask[i] = i; 2924 2925 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask); 2926 2927 bool DoSext = Name.contains("pmovsx"); 2928 Rep = 2929 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy); 2930 // If there are 3 arguments, it's a masked intrinsic so we need a select. 2931 if (CI->arg_size() == 3) 2932 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep, 2933 CI->getArgOperand(1)); 2934 } else if (Name == "avx512.mask.pmov.qd.256" || 2935 Name == "avx512.mask.pmov.qd.512" || 2936 Name == "avx512.mask.pmov.wb.256" || 2937 Name == "avx512.mask.pmov.wb.512") { 2938 Type *Ty = CI->getArgOperand(1)->getType(); 2939 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty); 2940 Rep = 2941 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); 2942 } else if (Name.starts_with("avx.vbroadcastf128") || 2943 Name == "avx2.vbroadcasti128") { 2944 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle. 2945 Type *EltTy = cast<VectorType>(CI->getType())->getElementType(); 2946 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits(); 2947 auto *VT = FixedVectorType::get(EltTy, NumSrcElts); 2948 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1)); 2949 if (NumSrcElts == 2) 2950 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1}); 2951 else 2952 Rep = Builder.CreateShuffleVector(Load, 2953 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3}); 2954 } else if (Name.starts_with("avx512.mask.shuf.i") || 2955 Name.starts_with("avx512.mask.shuf.f")) { 2956 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 2957 Type *VT = CI->getType(); 2958 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128; 2959 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits(); 2960 unsigned ControlBitsMask = NumLanes - 1; 2961 unsigned NumControlBits = NumLanes / 2; 2962 SmallVector<int, 8> ShuffleMask(0); 2963 2964 for (unsigned l = 0; l != NumLanes; ++l) { 2965 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask; 2966 // We actually need the other source. 2967 if (l >= NumLanes / 2) 2968 LaneMask += NumLanes; 2969 for (unsigned i = 0; i != NumElementsInLane; ++i) 2970 ShuffleMask.push_back(LaneMask * NumElementsInLane + i); 2971 } 2972 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0), 2973 CI->getArgOperand(1), ShuffleMask); 2974 Rep = 2975 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3)); 2976 } else if (Name.starts_with("avx512.mask.broadcastf") || 2977 Name.starts_with("avx512.mask.broadcasti")) { 2978 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType()) 2979 ->getNumElements(); 2980 unsigned NumDstElts = 2981 cast<FixedVectorType>(CI->getType())->getNumElements(); 2982 2983 SmallVector<int, 8> ShuffleMask(NumDstElts); 2984 for (unsigned i = 0; i != NumDstElts; ++i) 2985 ShuffleMask[i] = i % NumSrcElts; 2986 2987 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0), 2988 CI->getArgOperand(0), ShuffleMask); 2989 Rep = 2990 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); 2991 } else if (Name.starts_with("avx2.pbroadcast") || 2992 Name.starts_with("avx2.vbroadcast") || 2993 Name.starts_with("avx512.pbroadcast") || 2994 Name.starts_with("avx512.mask.broadcast.s")) { 2995 // Replace vp?broadcasts with a vector shuffle. 2996 Value *Op = CI->getArgOperand(0); 2997 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount(); 2998 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC); 2999 SmallVector<int, 8> M; 3000 ShuffleVectorInst::getShuffleMask(Constant::getNullValue(MaskTy), M); 3001 Rep = Builder.CreateShuffleVector(Op, M); 3002 3003 if (CI->arg_size() == 3) 3004 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep, 3005 CI->getArgOperand(1)); 3006 } else if (Name.starts_with("sse2.padds.") || 3007 Name.starts_with("avx2.padds.") || 3008 Name.starts_with("avx512.padds.") || 3009 Name.starts_with("avx512.mask.padds.")) { 3010 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat); 3011 } else if (Name.starts_with("sse2.psubs.") || 3012 Name.starts_with("avx2.psubs.") || 3013 Name.starts_with("avx512.psubs.") || 3014 Name.starts_with("avx512.mask.psubs.")) { 3015 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat); 3016 } else if (Name.starts_with("sse2.paddus.") || 3017 Name.starts_with("avx2.paddus.") || 3018 Name.starts_with("avx512.mask.paddus.")) { 3019 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat); 3020 } else if (Name.starts_with("sse2.psubus.") || 3021 Name.starts_with("avx2.psubus.") || 3022 Name.starts_with("avx512.mask.psubus.")) { 3023 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat); 3024 } else if (Name.starts_with("avx512.mask.palignr.")) { 3025 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0), 3026 CI->getArgOperand(1), CI->getArgOperand(2), 3027 CI->getArgOperand(3), CI->getArgOperand(4), 3028 false); 3029 } else if (Name.starts_with("avx512.mask.valign.")) { 3030 Rep = upgradeX86ALIGNIntrinsics( 3031 Builder, CI->getArgOperand(0), CI->getArgOperand(1), 3032 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true); 3033 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") { 3034 // 128/256-bit shift left specified in bits. 3035 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 3036 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), 3037 Shift / 8); // Shift is in bits. 3038 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") { 3039 // 128/256-bit shift right specified in bits. 3040 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 3041 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), 3042 Shift / 8); // Shift is in bits. 3043 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" || 3044 Name == "avx512.psll.dq.512") { 3045 // 128/256/512-bit shift left specified in bytes. 3046 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 3047 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift); 3048 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" || 3049 Name == "avx512.psrl.dq.512") { 3050 // 128/256/512-bit shift right specified in bytes. 3051 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 3052 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift); 3053 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") || 3054 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" || 3055 Name.starts_with("avx2.pblendd.")) { 3056 Value *Op0 = CI->getArgOperand(0); 3057 Value *Op1 = CI->getArgOperand(1); 3058 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 3059 auto *VecTy = cast<FixedVectorType>(CI->getType()); 3060 unsigned NumElts = VecTy->getNumElements(); 3061 3062 SmallVector<int, 16> Idxs(NumElts); 3063 for (unsigned i = 0; i != NumElts; ++i) 3064 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i; 3065 3066 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 3067 } else if (Name.starts_with("avx.vinsertf128.") || 3068 Name == "avx2.vinserti128" || 3069 Name.starts_with("avx512.mask.insert")) { 3070 Value *Op0 = CI->getArgOperand(0); 3071 Value *Op1 = CI->getArgOperand(1); 3072 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 3073 unsigned DstNumElts = 3074 cast<FixedVectorType>(CI->getType())->getNumElements(); 3075 unsigned SrcNumElts = 3076 cast<FixedVectorType>(Op1->getType())->getNumElements(); 3077 unsigned Scale = DstNumElts / SrcNumElts; 3078 3079 // Mask off the high bits of the immediate value; hardware ignores those. 3080 Imm = Imm % Scale; 3081 3082 // Extend the second operand into a vector the size of the destination. 3083 SmallVector<int, 8> Idxs(DstNumElts); 3084 for (unsigned i = 0; i != SrcNumElts; ++i) 3085 Idxs[i] = i; 3086 for (unsigned i = SrcNumElts; i != DstNumElts; ++i) 3087 Idxs[i] = SrcNumElts; 3088 Rep = Builder.CreateShuffleVector(Op1, Idxs); 3089 3090 // Insert the second operand into the first operand. 3091 3092 // Note that there is no guarantee that instruction lowering will actually 3093 // produce a vinsertf128 instruction for the created shuffles. In 3094 // particular, the 0 immediate case involves no lane changes, so it can 3095 // be handled as a blend. 3096 3097 // Example of shuffle mask for 32-bit elements: 3098 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 3099 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 > 3100 3101 // First fill with identify mask. 3102 for (unsigned i = 0; i != DstNumElts; ++i) 3103 Idxs[i] = i; 3104 // Then replace the elements where we need to insert. 3105 for (unsigned i = 0; i != SrcNumElts; ++i) 3106 Idxs[i + Imm * SrcNumElts] = i + DstNumElts; 3107 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs); 3108 3109 // If the intrinsic has a mask operand, handle that. 3110 if (CI->arg_size() == 5) 3111 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, 3112 CI->getArgOperand(3)); 3113 } else if (Name.starts_with("avx.vextractf128.") || 3114 Name == "avx2.vextracti128" || 3115 Name.starts_with("avx512.mask.vextract")) { 3116 Value *Op0 = CI->getArgOperand(0); 3117 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 3118 unsigned DstNumElts = 3119 cast<FixedVectorType>(CI->getType())->getNumElements(); 3120 unsigned SrcNumElts = 3121 cast<FixedVectorType>(Op0->getType())->getNumElements(); 3122 unsigned Scale = SrcNumElts / DstNumElts; 3123 3124 // Mask off the high bits of the immediate value; hardware ignores those. 3125 Imm = Imm % Scale; 3126 3127 // Get indexes for the subvector of the input vector. 3128 SmallVector<int, 8> Idxs(DstNumElts); 3129 for (unsigned i = 0; i != DstNumElts; ++i) { 3130 Idxs[i] = i + (Imm * DstNumElts); 3131 } 3132 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 3133 3134 // If the intrinsic has a mask operand, handle that. 3135 if (CI->arg_size() == 4) 3136 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, 3137 CI->getArgOperand(2)); 3138 } else if (Name.starts_with("avx512.mask.perm.df.") || 3139 Name.starts_with("avx512.mask.perm.di.")) { 3140 Value *Op0 = CI->getArgOperand(0); 3141 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 3142 auto *VecTy = cast<FixedVectorType>(CI->getType()); 3143 unsigned NumElts = VecTy->getNumElements(); 3144 3145 SmallVector<int, 8> Idxs(NumElts); 3146 for (unsigned i = 0; i != NumElts; ++i) 3147 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3); 3148 3149 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 3150 3151 if (CI->arg_size() == 4) 3152 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, 3153 CI->getArgOperand(2)); 3154 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") { 3155 // The immediate permute control byte looks like this: 3156 // [1:0] - select 128 bits from sources for low half of destination 3157 // [2] - ignore 3158 // [3] - zero low half of destination 3159 // [5:4] - select 128 bits from sources for high half of destination 3160 // [6] - ignore 3161 // [7] - zero high half of destination 3162 3163 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 3164 3165 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements(); 3166 unsigned HalfSize = NumElts / 2; 3167 SmallVector<int, 8> ShuffleMask(NumElts); 3168 3169 // Determine which operand(s) are actually in use for this instruction. 3170 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0); 3171 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0); 3172 3173 // If needed, replace operands based on zero mask. 3174 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0; 3175 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1; 3176 3177 // Permute low half of result. 3178 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0; 3179 for (unsigned i = 0; i < HalfSize; ++i) 3180 ShuffleMask[i] = StartIndex + i; 3181 3182 // Permute high half of result. 3183 StartIndex = (Imm & 0x10) ? HalfSize : 0; 3184 for (unsigned i = 0; i < HalfSize; ++i) 3185 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i; 3186 3187 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask); 3188 3189 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" || 3190 Name.starts_with("avx512.mask.vpermil.p") || 3191 Name.starts_with("avx512.mask.pshuf.d.")) { 3192 Value *Op0 = CI->getArgOperand(0); 3193 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 3194 auto *VecTy = cast<FixedVectorType>(CI->getType()); 3195 unsigned NumElts = VecTy->getNumElements(); 3196 // Calculate the size of each index in the immediate. 3197 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits(); 3198 unsigned IdxMask = ((1 << IdxSize) - 1); 3199 3200 SmallVector<int, 8> Idxs(NumElts); 3201 // Lookup the bits for this element, wrapping around the immediate every 3202 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need 3203 // to offset by the first index of each group. 3204 for (unsigned i = 0; i != NumElts; ++i) 3205 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask); 3206 3207 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 3208 3209 if (CI->arg_size() == 4) 3210 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, 3211 CI->getArgOperand(2)); 3212 } else if (Name == "sse2.pshufl.w" || 3213 Name.starts_with("avx512.mask.pshufl.w.")) { 3214 Value *Op0 = CI->getArgOperand(0); 3215 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 3216 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements(); 3217 3218 SmallVector<int, 16> Idxs(NumElts); 3219 for (unsigned l = 0; l != NumElts; l += 8) { 3220 for (unsigned i = 0; i != 4; ++i) 3221 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l; 3222 for (unsigned i = 4; i != 8; ++i) 3223 Idxs[i + l] = i + l; 3224 } 3225 3226 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 3227 3228 if (CI->arg_size() == 4) 3229 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, 3230 CI->getArgOperand(2)); 3231 } else if (Name == "sse2.pshufh.w" || 3232 Name.starts_with("avx512.mask.pshufh.w.")) { 3233 Value *Op0 = CI->getArgOperand(0); 3234 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 3235 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements(); 3236 3237 SmallVector<int, 16> Idxs(NumElts); 3238 for (unsigned l = 0; l != NumElts; l += 8) { 3239 for (unsigned i = 0; i != 4; ++i) 3240 Idxs[i + l] = i + l; 3241 for (unsigned i = 0; i != 4; ++i) 3242 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l; 3243 } 3244 3245 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 3246 3247 if (CI->arg_size() == 4) 3248 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, 3249 CI->getArgOperand(2)); 3250 } else if (Name.starts_with("avx512.mask.shuf.p")) { 3251 Value *Op0 = CI->getArgOperand(0); 3252 Value *Op1 = CI->getArgOperand(1); 3253 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue(); 3254 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements(); 3255 3256 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits(); 3257 unsigned HalfLaneElts = NumLaneElts / 2; 3258 3259 SmallVector<int, 16> Idxs(NumElts); 3260 for (unsigned i = 0; i != NumElts; ++i) { 3261 // Base index is the starting element of the lane. 3262 Idxs[i] = i - (i % NumLaneElts); 3263 // If we are half way through the lane switch to the other source. 3264 if ((i % NumLaneElts) >= HalfLaneElts) 3265 Idxs[i] += NumElts; 3266 // Now select the specific element. By adding HalfLaneElts bits from 3267 // the immediate. Wrapping around the immediate every 8-bits. 3268 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1); 3269 } 3270 3271 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 3272 3273 Rep = 3274 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3)); 3275 } else if (Name.starts_with("avx512.mask.movddup") || 3276 Name.starts_with("avx512.mask.movshdup") || 3277 Name.starts_with("avx512.mask.movsldup")) { 3278 Value *Op0 = CI->getArgOperand(0); 3279 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements(); 3280 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits(); 3281 3282 unsigned Offset = 0; 3283 if (Name.starts_with("avx512.mask.movshdup.")) 3284 Offset = 1; 3285 3286 SmallVector<int, 16> Idxs(NumElts); 3287 for (unsigned l = 0; l != NumElts; l += NumLaneElts) 3288 for (unsigned i = 0; i != NumLaneElts; i += 2) { 3289 Idxs[i + l + 0] = i + l + Offset; 3290 Idxs[i + l + 1] = i + l + Offset; 3291 } 3292 3293 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs); 3294 3295 Rep = 3296 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); 3297 } else if (Name.starts_with("avx512.mask.punpckl") || 3298 Name.starts_with("avx512.mask.unpckl.")) { 3299 Value *Op0 = CI->getArgOperand(0); 3300 Value *Op1 = CI->getArgOperand(1); 3301 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements(); 3302 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits(); 3303 3304 SmallVector<int, 64> Idxs(NumElts); 3305 for (int l = 0; l != NumElts; l += NumLaneElts) 3306 for (int i = 0; i != NumLaneElts; ++i) 3307 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2); 3308 3309 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 3310 3311 Rep = 3312 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); 3313 } else if (Name.starts_with("avx512.mask.punpckh") || 3314 Name.starts_with("avx512.mask.unpckh.")) { 3315 Value *Op0 = CI->getArgOperand(0); 3316 Value *Op1 = CI->getArgOperand(1); 3317 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements(); 3318 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits(); 3319 3320 SmallVector<int, 64> Idxs(NumElts); 3321 for (int l = 0; l != NumElts; l += NumLaneElts) 3322 for (int i = 0; i != NumLaneElts; ++i) 3323 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2); 3324 3325 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs); 3326 3327 Rep = 3328 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); 3329 } else if (Name.starts_with("avx512.mask.and.") || 3330 Name.starts_with("avx512.mask.pand.")) { 3331 VectorType *FTy = cast<VectorType>(CI->getType()); 3332 VectorType *ITy = VectorType::getInteger(FTy); 3333 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 3334 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 3335 Rep = Builder.CreateBitCast(Rep, FTy); 3336 Rep = 3337 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); 3338 } else if (Name.starts_with("avx512.mask.andn.") || 3339 Name.starts_with("avx512.mask.pandn.")) { 3340 VectorType *FTy = cast<VectorType>(CI->getType()); 3341 VectorType *ITy = VectorType::getInteger(FTy); 3342 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy)); 3343 Rep = Builder.CreateAnd(Rep, 3344 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 3345 Rep = Builder.CreateBitCast(Rep, FTy); 3346 Rep = 3347 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); 3348 } else if (Name.starts_with("avx512.mask.or.") || 3349 Name.starts_with("avx512.mask.por.")) { 3350 VectorType *FTy = cast<VectorType>(CI->getType()); 3351 VectorType *ITy = VectorType::getInteger(FTy); 3352 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 3353 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 3354 Rep = Builder.CreateBitCast(Rep, FTy); 3355 Rep = 3356 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); 3357 } else if (Name.starts_with("avx512.mask.xor.") || 3358 Name.starts_with("avx512.mask.pxor.")) { 3359 VectorType *FTy = cast<VectorType>(CI->getType()); 3360 VectorType *ITy = VectorType::getInteger(FTy); 3361 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy), 3362 Builder.CreateBitCast(CI->getArgOperand(1), ITy)); 3363 Rep = Builder.CreateBitCast(Rep, FTy); 3364 Rep = 3365 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); 3366 } else if (Name.starts_with("avx512.mask.padd.")) { 3367 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1)); 3368 Rep = 3369 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); 3370 } else if (Name.starts_with("avx512.mask.psub.")) { 3371 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1)); 3372 Rep = 3373 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); 3374 } else if (Name.starts_with("avx512.mask.pmull.")) { 3375 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1)); 3376 Rep = 3377 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); 3378 } else if (Name.starts_with("avx512.mask.add.p")) { 3379 if (Name.ends_with(".512")) { 3380 Intrinsic::ID IID; 3381 if (Name[17] == 's') 3382 IID = Intrinsic::x86_avx512_add_ps_512; 3383 else 3384 IID = Intrinsic::x86_avx512_add_pd_512; 3385 3386 Rep = Builder.CreateIntrinsic( 3387 IID, {}, 3388 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)}); 3389 } else { 3390 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1)); 3391 } 3392 Rep = 3393 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); 3394 } else if (Name.starts_with("avx512.mask.div.p")) { 3395 if (Name.ends_with(".512")) { 3396 Intrinsic::ID IID; 3397 if (Name[17] == 's') 3398 IID = Intrinsic::x86_avx512_div_ps_512; 3399 else 3400 IID = Intrinsic::x86_avx512_div_pd_512; 3401 3402 Rep = Builder.CreateIntrinsic( 3403 IID, {}, 3404 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)}); 3405 } else { 3406 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1)); 3407 } 3408 Rep = 3409 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); 3410 } else if (Name.starts_with("avx512.mask.mul.p")) { 3411 if (Name.ends_with(".512")) { 3412 Intrinsic::ID IID; 3413 if (Name[17] == 's') 3414 IID = Intrinsic::x86_avx512_mul_ps_512; 3415 else 3416 IID = Intrinsic::x86_avx512_mul_pd_512; 3417 3418 Rep = Builder.CreateIntrinsic( 3419 IID, {}, 3420 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)}); 3421 } else { 3422 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1)); 3423 } 3424 Rep = 3425 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); 3426 } else if (Name.starts_with("avx512.mask.sub.p")) { 3427 if (Name.ends_with(".512")) { 3428 Intrinsic::ID IID; 3429 if (Name[17] == 's') 3430 IID = Intrinsic::x86_avx512_sub_ps_512; 3431 else 3432 IID = Intrinsic::x86_avx512_sub_pd_512; 3433 3434 Rep = Builder.CreateIntrinsic( 3435 IID, {}, 3436 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)}); 3437 } else { 3438 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1)); 3439 } 3440 Rep = 3441 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); 3442 } else if ((Name.starts_with("avx512.mask.max.p") || 3443 Name.starts_with("avx512.mask.min.p")) && 3444 Name.drop_front(18) == ".512") { 3445 bool IsDouble = Name[17] == 'd'; 3446 bool IsMin = Name[13] == 'i'; 3447 static const Intrinsic::ID MinMaxTbl[2][2] = { 3448 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512}, 3449 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}}; 3450 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble]; 3451 3452 Rep = Builder.CreateIntrinsic( 3453 IID, {}, 3454 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)}); 3455 Rep = 3456 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2)); 3457 } else if (Name.starts_with("avx512.mask.lzcnt.")) { 3458 Rep = 3459 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(), 3460 {CI->getArgOperand(0), Builder.getInt1(false)}); 3461 Rep = 3462 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1)); 3463 } else if (Name.starts_with("avx512.mask.psll")) { 3464 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i'); 3465 bool IsVariable = Name[16] == 'v'; 3466 char Size = Name[16] == '.' ? Name[17] 3467 : Name[17] == '.' ? Name[18] 3468 : Name[18] == '.' ? Name[19] 3469 : Name[20]; 3470 3471 Intrinsic::ID IID; 3472 if (IsVariable && Name[17] != '.') { 3473 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di 3474 IID = Intrinsic::x86_avx2_psllv_q; 3475 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di 3476 IID = Intrinsic::x86_avx2_psllv_q_256; 3477 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si 3478 IID = Intrinsic::x86_avx2_psllv_d; 3479 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si 3480 IID = Intrinsic::x86_avx2_psllv_d_256; 3481 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi 3482 IID = Intrinsic::x86_avx512_psllv_w_128; 3483 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi 3484 IID = Intrinsic::x86_avx512_psllv_w_256; 3485 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi 3486 IID = Intrinsic::x86_avx512_psllv_w_512; 3487 else 3488 llvm_unreachable("Unexpected size"); 3489 } else if (Name.ends_with(".128")) { 3490 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128 3491 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d 3492 : Intrinsic::x86_sse2_psll_d; 3493 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128 3494 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q 3495 : Intrinsic::x86_sse2_psll_q; 3496 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128 3497 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w 3498 : Intrinsic::x86_sse2_psll_w; 3499 else 3500 llvm_unreachable("Unexpected size"); 3501 } else if (Name.ends_with(".256")) { 3502 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256 3503 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d 3504 : Intrinsic::x86_avx2_psll_d; 3505 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256 3506 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q 3507 : Intrinsic::x86_avx2_psll_q; 3508 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256 3509 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w 3510 : Intrinsic::x86_avx2_psll_w; 3511 else 3512 llvm_unreachable("Unexpected size"); 3513 } else { 3514 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512 3515 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 3516 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512 3517 : Intrinsic::x86_avx512_psll_d_512; 3518 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512 3519 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 3520 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512 3521 : Intrinsic::x86_avx512_psll_q_512; 3522 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w 3523 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512 3524 : Intrinsic::x86_avx512_psll_w_512; 3525 else 3526 llvm_unreachable("Unexpected size"); 3527 } 3528 3529 Rep = upgradeX86MaskedShift(Builder, *CI, IID); 3530 } else if (Name.starts_with("avx512.mask.psrl")) { 3531 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i'); 3532 bool IsVariable = Name[16] == 'v'; 3533 char Size = Name[16] == '.' ? Name[17] 3534 : Name[17] == '.' ? Name[18] 3535 : Name[18] == '.' ? Name[19] 3536 : Name[20]; 3537 3538 Intrinsic::ID IID; 3539 if (IsVariable && Name[17] != '.') { 3540 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di 3541 IID = Intrinsic::x86_avx2_psrlv_q; 3542 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di 3543 IID = Intrinsic::x86_avx2_psrlv_q_256; 3544 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si 3545 IID = Intrinsic::x86_avx2_psrlv_d; 3546 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si 3547 IID = Intrinsic::x86_avx2_psrlv_d_256; 3548 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi 3549 IID = Intrinsic::x86_avx512_psrlv_w_128; 3550 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi 3551 IID = Intrinsic::x86_avx512_psrlv_w_256; 3552 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi 3553 IID = Intrinsic::x86_avx512_psrlv_w_512; 3554 else 3555 llvm_unreachable("Unexpected size"); 3556 } else if (Name.ends_with(".128")) { 3557 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128 3558 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d 3559 : Intrinsic::x86_sse2_psrl_d; 3560 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128 3561 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q 3562 : Intrinsic::x86_sse2_psrl_q; 3563 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128 3564 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w 3565 : Intrinsic::x86_sse2_psrl_w; 3566 else 3567 llvm_unreachable("Unexpected size"); 3568 } else if (Name.ends_with(".256")) { 3569 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256 3570 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d 3571 : Intrinsic::x86_avx2_psrl_d; 3572 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256 3573 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q 3574 : Intrinsic::x86_avx2_psrl_q; 3575 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256 3576 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w 3577 : Intrinsic::x86_avx2_psrl_w; 3578 else 3579 llvm_unreachable("Unexpected size"); 3580 } else { 3581 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512 3582 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 3583 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512 3584 : Intrinsic::x86_avx512_psrl_d_512; 3585 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512 3586 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 3587 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512 3588 : Intrinsic::x86_avx512_psrl_q_512; 3589 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w) 3590 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512 3591 : Intrinsic::x86_avx512_psrl_w_512; 3592 else 3593 llvm_unreachable("Unexpected size"); 3594 } 3595 3596 Rep = upgradeX86MaskedShift(Builder, *CI, IID); 3597 } else if (Name.starts_with("avx512.mask.psra")) { 3598 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i'); 3599 bool IsVariable = Name[16] == 'v'; 3600 char Size = Name[16] == '.' ? Name[17] 3601 : Name[17] == '.' ? Name[18] 3602 : Name[18] == '.' ? Name[19] 3603 : Name[20]; 3604 3605 Intrinsic::ID IID; 3606 if (IsVariable && Name[17] != '.') { 3607 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si 3608 IID = Intrinsic::x86_avx2_psrav_d; 3609 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si 3610 IID = Intrinsic::x86_avx2_psrav_d_256; 3611 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi 3612 IID = Intrinsic::x86_avx512_psrav_w_128; 3613 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi 3614 IID = Intrinsic::x86_avx512_psrav_w_256; 3615 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi 3616 IID = Intrinsic::x86_avx512_psrav_w_512; 3617 else 3618 llvm_unreachable("Unexpected size"); 3619 } else if (Name.ends_with(".128")) { 3620 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128 3621 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d 3622 : Intrinsic::x86_sse2_psra_d; 3623 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128 3624 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 3625 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128 3626 : Intrinsic::x86_avx512_psra_q_128; 3627 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128 3628 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w 3629 : Intrinsic::x86_sse2_psra_w; 3630 else 3631 llvm_unreachable("Unexpected size"); 3632 } else if (Name.ends_with(".256")) { 3633 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256 3634 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d 3635 : Intrinsic::x86_avx2_psra_d; 3636 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256 3637 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 3638 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256 3639 : Intrinsic::x86_avx512_psra_q_256; 3640 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256 3641 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w 3642 : Intrinsic::x86_avx2_psra_w; 3643 else 3644 llvm_unreachable("Unexpected size"); 3645 } else { 3646 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512 3647 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 3648 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512 3649 : Intrinsic::x86_avx512_psra_d_512; 3650 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q 3651 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 3652 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512 3653 : Intrinsic::x86_avx512_psra_q_512; 3654 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w 3655 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512 3656 : Intrinsic::x86_avx512_psra_w_512; 3657 else 3658 llvm_unreachable("Unexpected size"); 3659 } 3660 3661 Rep = upgradeX86MaskedShift(Builder, *CI, IID); 3662 } else if (Name.starts_with("avx512.mask.move.s")) { 3663 Rep = upgradeMaskedMove(Builder, *CI); 3664 } else if (Name.starts_with("avx512.cvtmask2")) { 3665 Rep = upgradeMaskToInt(Builder, *CI); 3666 } else if (Name.ends_with(".movntdqa")) { 3667 MDNode *Node = MDNode::get( 3668 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1))); 3669 3670 LoadInst *LI = Builder.CreateAlignedLoad( 3671 CI->getType(), CI->getArgOperand(0), 3672 Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)); 3673 LI->setMetadata(LLVMContext::MD_nontemporal, Node); 3674 Rep = LI; 3675 } else if (Name.starts_with("fma.vfmadd.") || 3676 Name.starts_with("fma.vfmsub.") || 3677 Name.starts_with("fma.vfnmadd.") || 3678 Name.starts_with("fma.vfnmsub.")) { 3679 bool NegMul = Name[6] == 'n'; 3680 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's'; 3681 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's'; 3682 3683 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1), 3684 CI->getArgOperand(2)}; 3685 3686 if (IsScalar) { 3687 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0); 3688 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0); 3689 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0); 3690 } 3691 3692 if (NegMul && !IsScalar) 3693 Ops[0] = Builder.CreateFNeg(Ops[0]); 3694 if (NegMul && IsScalar) 3695 Ops[1] = Builder.CreateFNeg(Ops[1]); 3696 if (NegAcc) 3697 Ops[2] = Builder.CreateFNeg(Ops[2]); 3698 3699 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops); 3700 3701 if (IsScalar) 3702 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0); 3703 } else if (Name.starts_with("fma4.vfmadd.s")) { 3704 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1), 3705 CI->getArgOperand(2)}; 3706 3707 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0); 3708 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0); 3709 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0); 3710 3711 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops); 3712 3713 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()), 3714 Rep, (uint64_t)0); 3715 } else if (Name.starts_with("avx512.mask.vfmadd.s") || 3716 Name.starts_with("avx512.maskz.vfmadd.s") || 3717 Name.starts_with("avx512.mask3.vfmadd.s") || 3718 Name.starts_with("avx512.mask3.vfmsub.s") || 3719 Name.starts_with("avx512.mask3.vfnmsub.s")) { 3720 bool IsMask3 = Name[11] == '3'; 3721 bool IsMaskZ = Name[11] == 'z'; 3722 // Drop the "avx512.mask." to make it easier. 3723 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12); 3724 bool NegMul = Name[2] == 'n'; 3725 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; 3726 3727 Value *A = CI->getArgOperand(0); 3728 Value *B = CI->getArgOperand(1); 3729 Value *C = CI->getArgOperand(2); 3730 3731 if (NegMul && (IsMask3 || IsMaskZ)) 3732 A = Builder.CreateFNeg(A); 3733 if (NegMul && !(IsMask3 || IsMaskZ)) 3734 B = Builder.CreateFNeg(B); 3735 if (NegAcc) 3736 C = Builder.CreateFNeg(C); 3737 3738 A = Builder.CreateExtractElement(A, (uint64_t)0); 3739 B = Builder.CreateExtractElement(B, (uint64_t)0); 3740 C = Builder.CreateExtractElement(C, (uint64_t)0); 3741 3742 if (!isa<ConstantInt>(CI->getArgOperand(4)) || 3743 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) { 3744 Value *Ops[] = {A, B, C, CI->getArgOperand(4)}; 3745 3746 Intrinsic::ID IID; 3747 if (Name.back() == 'd') 3748 IID = Intrinsic::x86_avx512_vfmadd_f64; 3749 else 3750 IID = Intrinsic::x86_avx512_vfmadd_f32; 3751 Rep = Builder.CreateIntrinsic(IID, {}, Ops); 3752 } else { 3753 Rep = Builder.CreateIntrinsic(Intrinsic::fma, A->getType(), {A, B, C}); 3754 } 3755 3756 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType()) 3757 : IsMask3 ? C 3758 : A; 3759 3760 // For Mask3 with NegAcc, we need to create a new extractelement that 3761 // avoids the negation above. 3762 if (NegAcc && IsMask3) 3763 PassThru = 3764 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0); 3765 3766 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru); 3767 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep, 3768 (uint64_t)0); 3769 } else if (Name.starts_with("avx512.mask.vfmadd.p") || 3770 Name.starts_with("avx512.mask.vfnmadd.p") || 3771 Name.starts_with("avx512.mask.vfnmsub.p") || 3772 Name.starts_with("avx512.mask3.vfmadd.p") || 3773 Name.starts_with("avx512.mask3.vfmsub.p") || 3774 Name.starts_with("avx512.mask3.vfnmsub.p") || 3775 Name.starts_with("avx512.maskz.vfmadd.p")) { 3776 bool IsMask3 = Name[11] == '3'; 3777 bool IsMaskZ = Name[11] == 'z'; 3778 // Drop the "avx512.mask." to make it easier. 3779 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12); 3780 bool NegMul = Name[2] == 'n'; 3781 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's'; 3782 3783 Value *A = CI->getArgOperand(0); 3784 Value *B = CI->getArgOperand(1); 3785 Value *C = CI->getArgOperand(2); 3786 3787 if (NegMul && (IsMask3 || IsMaskZ)) 3788 A = Builder.CreateFNeg(A); 3789 if (NegMul && !(IsMask3 || IsMaskZ)) 3790 B = Builder.CreateFNeg(B); 3791 if (NegAcc) 3792 C = Builder.CreateFNeg(C); 3793 3794 if (CI->arg_size() == 5 && 3795 (!isa<ConstantInt>(CI->getArgOperand(4)) || 3796 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) { 3797 Intrinsic::ID IID; 3798 // Check the character before ".512" in string. 3799 if (Name[Name.size() - 5] == 's') 3800 IID = Intrinsic::x86_avx512_vfmadd_ps_512; 3801 else 3802 IID = Intrinsic::x86_avx512_vfmadd_pd_512; 3803 3804 Rep = Builder.CreateIntrinsic(IID, {}, {A, B, C, CI->getArgOperand(4)}); 3805 } else { 3806 Rep = Builder.CreateIntrinsic(Intrinsic::fma, A->getType(), {A, B, C}); 3807 } 3808 3809 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) 3810 : IsMask3 ? CI->getArgOperand(2) 3811 : CI->getArgOperand(0); 3812 3813 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 3814 } else if (Name.starts_with("fma.vfmsubadd.p")) { 3815 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); 3816 unsigned EltWidth = CI->getType()->getScalarSizeInBits(); 3817 Intrinsic::ID IID; 3818 if (VecWidth == 128 && EltWidth == 32) 3819 IID = Intrinsic::x86_fma_vfmaddsub_ps; 3820 else if (VecWidth == 256 && EltWidth == 32) 3821 IID = Intrinsic::x86_fma_vfmaddsub_ps_256; 3822 else if (VecWidth == 128 && EltWidth == 64) 3823 IID = Intrinsic::x86_fma_vfmaddsub_pd; 3824 else if (VecWidth == 256 && EltWidth == 64) 3825 IID = Intrinsic::x86_fma_vfmaddsub_pd_256; 3826 else 3827 llvm_unreachable("Unexpected intrinsic"); 3828 3829 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1), 3830 CI->getArgOperand(2)}; 3831 Ops[2] = Builder.CreateFNeg(Ops[2]); 3832 Rep = Builder.CreateIntrinsic(IID, {}, Ops); 3833 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") || 3834 Name.starts_with("avx512.mask3.vfmaddsub.p") || 3835 Name.starts_with("avx512.maskz.vfmaddsub.p") || 3836 Name.starts_with("avx512.mask3.vfmsubadd.p")) { 3837 bool IsMask3 = Name[11] == '3'; 3838 bool IsMaskZ = Name[11] == 'z'; 3839 // Drop the "avx512.mask." to make it easier. 3840 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12); 3841 bool IsSubAdd = Name[3] == 's'; 3842 if (CI->arg_size() == 5) { 3843 Intrinsic::ID IID; 3844 // Check the character before ".512" in string. 3845 if (Name[Name.size() - 5] == 's') 3846 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512; 3847 else 3848 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512; 3849 3850 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1), 3851 CI->getArgOperand(2), CI->getArgOperand(4)}; 3852 if (IsSubAdd) 3853 Ops[2] = Builder.CreateFNeg(Ops[2]); 3854 3855 Rep = Builder.CreateIntrinsic(IID, {}, Ops); 3856 } else { 3857 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements(); 3858 3859 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1), 3860 CI->getArgOperand(2)}; 3861 3862 Function *FMA = Intrinsic::getOrInsertDeclaration( 3863 CI->getModule(), Intrinsic::fma, Ops[0]->getType()); 3864 Value *Odd = Builder.CreateCall(FMA, Ops); 3865 Ops[2] = Builder.CreateFNeg(Ops[2]); 3866 Value *Even = Builder.CreateCall(FMA, Ops); 3867 3868 if (IsSubAdd) 3869 std::swap(Even, Odd); 3870 3871 SmallVector<int, 32> Idxs(NumElts); 3872 for (int i = 0; i != NumElts; ++i) 3873 Idxs[i] = i + (i % 2) * NumElts; 3874 3875 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs); 3876 } 3877 3878 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType()) 3879 : IsMask3 ? CI->getArgOperand(2) 3880 : CI->getArgOperand(0); 3881 3882 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 3883 } else if (Name.starts_with("avx512.mask.pternlog.") || 3884 Name.starts_with("avx512.maskz.pternlog.")) { 3885 bool ZeroMask = Name[11] == 'z'; 3886 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); 3887 unsigned EltWidth = CI->getType()->getScalarSizeInBits(); 3888 Intrinsic::ID IID; 3889 if (VecWidth == 128 && EltWidth == 32) 3890 IID = Intrinsic::x86_avx512_pternlog_d_128; 3891 else if (VecWidth == 256 && EltWidth == 32) 3892 IID = Intrinsic::x86_avx512_pternlog_d_256; 3893 else if (VecWidth == 512 && EltWidth == 32) 3894 IID = Intrinsic::x86_avx512_pternlog_d_512; 3895 else if (VecWidth == 128 && EltWidth == 64) 3896 IID = Intrinsic::x86_avx512_pternlog_q_128; 3897 else if (VecWidth == 256 && EltWidth == 64) 3898 IID = Intrinsic::x86_avx512_pternlog_q_256; 3899 else if (VecWidth == 512 && EltWidth == 64) 3900 IID = Intrinsic::x86_avx512_pternlog_q_512; 3901 else 3902 llvm_unreachable("Unexpected intrinsic"); 3903 3904 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1), 3905 CI->getArgOperand(2), CI->getArgOperand(3)}; 3906 Rep = Builder.CreateIntrinsic(IID, {}, Args); 3907 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) 3908 : CI->getArgOperand(0); 3909 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru); 3910 } else if (Name.starts_with("avx512.mask.vpmadd52") || 3911 Name.starts_with("avx512.maskz.vpmadd52")) { 3912 bool ZeroMask = Name[11] == 'z'; 3913 bool High = Name[20] == 'h' || Name[21] == 'h'; 3914 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); 3915 Intrinsic::ID IID; 3916 if (VecWidth == 128 && !High) 3917 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128; 3918 else if (VecWidth == 256 && !High) 3919 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256; 3920 else if (VecWidth == 512 && !High) 3921 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512; 3922 else if (VecWidth == 128 && High) 3923 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128; 3924 else if (VecWidth == 256 && High) 3925 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256; 3926 else if (VecWidth == 512 && High) 3927 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512; 3928 else 3929 llvm_unreachable("Unexpected intrinsic"); 3930 3931 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1), 3932 CI->getArgOperand(2)}; 3933 Rep = Builder.CreateIntrinsic(IID, {}, Args); 3934 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) 3935 : CI->getArgOperand(0); 3936 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 3937 } else if (Name.starts_with("avx512.mask.vpermi2var.") || 3938 Name.starts_with("avx512.mask.vpermt2var.") || 3939 Name.starts_with("avx512.maskz.vpermt2var.")) { 3940 bool ZeroMask = Name[11] == 'z'; 3941 bool IndexForm = Name[17] == 'i'; 3942 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm); 3943 } else if (Name.starts_with("avx512.mask.vpdpbusd.") || 3944 Name.starts_with("avx512.maskz.vpdpbusd.") || 3945 Name.starts_with("avx512.mask.vpdpbusds.") || 3946 Name.starts_with("avx512.maskz.vpdpbusds.")) { 3947 bool ZeroMask = Name[11] == 'z'; 3948 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; 3949 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); 3950 Intrinsic::ID IID; 3951 if (VecWidth == 128 && !IsSaturating) 3952 IID = Intrinsic::x86_avx512_vpdpbusd_128; 3953 else if (VecWidth == 256 && !IsSaturating) 3954 IID = Intrinsic::x86_avx512_vpdpbusd_256; 3955 else if (VecWidth == 512 && !IsSaturating) 3956 IID = Intrinsic::x86_avx512_vpdpbusd_512; 3957 else if (VecWidth == 128 && IsSaturating) 3958 IID = Intrinsic::x86_avx512_vpdpbusds_128; 3959 else if (VecWidth == 256 && IsSaturating) 3960 IID = Intrinsic::x86_avx512_vpdpbusds_256; 3961 else if (VecWidth == 512 && IsSaturating) 3962 IID = Intrinsic::x86_avx512_vpdpbusds_512; 3963 else 3964 llvm_unreachable("Unexpected intrinsic"); 3965 3966 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1), 3967 CI->getArgOperand(2)}; 3968 Rep = Builder.CreateIntrinsic(IID, {}, Args); 3969 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) 3970 : CI->getArgOperand(0); 3971 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 3972 } else if (Name.starts_with("avx512.mask.vpdpwssd.") || 3973 Name.starts_with("avx512.maskz.vpdpwssd.") || 3974 Name.starts_with("avx512.mask.vpdpwssds.") || 3975 Name.starts_with("avx512.maskz.vpdpwssds.")) { 3976 bool ZeroMask = Name[11] == 'z'; 3977 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's'; 3978 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits(); 3979 Intrinsic::ID IID; 3980 if (VecWidth == 128 && !IsSaturating) 3981 IID = Intrinsic::x86_avx512_vpdpwssd_128; 3982 else if (VecWidth == 256 && !IsSaturating) 3983 IID = Intrinsic::x86_avx512_vpdpwssd_256; 3984 else if (VecWidth == 512 && !IsSaturating) 3985 IID = Intrinsic::x86_avx512_vpdpwssd_512; 3986 else if (VecWidth == 128 && IsSaturating) 3987 IID = Intrinsic::x86_avx512_vpdpwssds_128; 3988 else if (VecWidth == 256 && IsSaturating) 3989 IID = Intrinsic::x86_avx512_vpdpwssds_256; 3990 else if (VecWidth == 512 && IsSaturating) 3991 IID = Intrinsic::x86_avx512_vpdpwssds_512; 3992 else 3993 llvm_unreachable("Unexpected intrinsic"); 3994 3995 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1), 3996 CI->getArgOperand(2)}; 3997 Rep = Builder.CreateIntrinsic(IID, {}, Args); 3998 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType()) 3999 : CI->getArgOperand(0); 4000 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru); 4001 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" || 4002 Name == "addcarry.u32" || Name == "addcarry.u64" || 4003 Name == "subborrow.u32" || Name == "subborrow.u64") { 4004 Intrinsic::ID IID; 4005 if (Name[0] == 'a' && Name.back() == '2') 4006 IID = Intrinsic::x86_addcarry_32; 4007 else if (Name[0] == 'a' && Name.back() == '4') 4008 IID = Intrinsic::x86_addcarry_64; 4009 else if (Name[0] == 's' && Name.back() == '2') 4010 IID = Intrinsic::x86_subborrow_32; 4011 else if (Name[0] == 's' && Name.back() == '4') 4012 IID = Intrinsic::x86_subborrow_64; 4013 else 4014 llvm_unreachable("Unexpected intrinsic"); 4015 4016 // Make a call with 3 operands. 4017 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1), 4018 CI->getArgOperand(2)}; 4019 Value *NewCall = Builder.CreateIntrinsic(IID, {}, Args); 4020 4021 // Extract the second result and store it. 4022 Value *Data = Builder.CreateExtractValue(NewCall, 1); 4023 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1)); 4024 // Replace the original call result with the first result of the new call. 4025 Value *CF = Builder.CreateExtractValue(NewCall, 0); 4026 4027 CI->replaceAllUsesWith(CF); 4028 Rep = nullptr; 4029 } else if (Name.starts_with("avx512.mask.") && 4030 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) { 4031 // Rep will be updated by the call in the condition. 4032 } 4033 4034 return Rep; 4035 } 4036 4037 static Value *upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, 4038 Function *F, IRBuilder<> &Builder) { 4039 if (Name.starts_with("neon.bfcvt")) { 4040 if (Name.starts_with("neon.bfcvtn2")) { 4041 SmallVector<int, 32> LoMask(4); 4042 std::iota(LoMask.begin(), LoMask.end(), 0); 4043 SmallVector<int, 32> ConcatMask(8); 4044 std::iota(ConcatMask.begin(), ConcatMask.end(), 0); 4045 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask); 4046 Value *Trunc = 4047 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType()); 4048 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask); 4049 } else if (Name.starts_with("neon.bfcvtn")) { 4050 SmallVector<int, 32> ConcatMask(8); 4051 std::iota(ConcatMask.begin(), ConcatMask.end(), 0); 4052 Type *V4BF16 = 4053 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4); 4054 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16); 4055 dbgs() << "Trunc: " << *Trunc << "\n"; 4056 return Builder.CreateShuffleVector( 4057 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask); 4058 } else { 4059 return Builder.CreateFPTrunc(CI->getOperand(0), 4060 Type::getBFloatTy(F->getContext())); 4061 } 4062 } else if (Name.starts_with("sve.fcvt")) { 4063 Intrinsic::ID NewID = 4064 StringSwitch<Intrinsic::ID>(Name) 4065 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2) 4066 .Case("sve.fcvtnt.bf16f32", 4067 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2) 4068 .Default(Intrinsic::not_intrinsic); 4069 if (NewID == Intrinsic::not_intrinsic) 4070 llvm_unreachable("Unhandled Intrinsic!"); 4071 4072 SmallVector<Value *, 3> Args(CI->args()); 4073 4074 // The original intrinsics incorrectly used a predicate based on the 4075 // smallest element type rather than the largest. 4076 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8); 4077 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4); 4078 4079 if (Args[1]->getType() != BadPredTy) 4080 llvm_unreachable("Unexpected predicate type!"); 4081 4082 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool, 4083 BadPredTy, Args[1]); 4084 Args[1] = Builder.CreateIntrinsic( 4085 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]); 4086 4087 return Builder.CreateIntrinsic(NewID, {}, Args, /*FMFSource=*/nullptr, 4088 CI->getName()); 4089 } 4090 4091 llvm_unreachable("Unhandled Intrinsic!"); 4092 } 4093 4094 static Value *upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, 4095 IRBuilder<> &Builder) { 4096 if (Name == "mve.vctp64.old") { 4097 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the 4098 // correct type. 4099 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {}, 4100 CI->getArgOperand(0), 4101 /*FMFSource=*/nullptr, CI->getName()); 4102 Value *C1 = Builder.CreateIntrinsic( 4103 Intrinsic::arm_mve_pred_v2i, 4104 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP); 4105 return Builder.CreateIntrinsic( 4106 Intrinsic::arm_mve_pred_i2v, 4107 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1); 4108 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" || 4109 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" || 4110 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" || 4111 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" || 4112 Name == 4113 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" || 4114 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" || 4115 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" || 4116 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" || 4117 Name == 4118 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" || 4119 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" || 4120 Name == "cde.vcx1q.predicated.v2i64.v4i1" || 4121 Name == "cde.vcx1qa.predicated.v2i64.v4i1" || 4122 Name == "cde.vcx2q.predicated.v2i64.v4i1" || 4123 Name == "cde.vcx2qa.predicated.v2i64.v4i1" || 4124 Name == "cde.vcx3q.predicated.v2i64.v4i1" || 4125 Name == "cde.vcx3qa.predicated.v2i64.v4i1") { 4126 std::vector<Type *> Tys; 4127 unsigned ID = CI->getIntrinsicID(); 4128 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2); 4129 switch (ID) { 4130 case Intrinsic::arm_mve_mull_int_predicated: 4131 case Intrinsic::arm_mve_vqdmull_predicated: 4132 case Intrinsic::arm_mve_vldr_gather_base_predicated: 4133 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty}; 4134 break; 4135 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: 4136 case Intrinsic::arm_mve_vstr_scatter_base_predicated: 4137 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: 4138 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(), 4139 V2I1Ty}; 4140 break; 4141 case Intrinsic::arm_mve_vldr_gather_offset_predicated: 4142 Tys = {CI->getType(), CI->getOperand(0)->getType(), 4143 CI->getOperand(1)->getType(), V2I1Ty}; 4144 break; 4145 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: 4146 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(), 4147 CI->getOperand(2)->getType(), V2I1Ty}; 4148 break; 4149 case Intrinsic::arm_cde_vcx1q_predicated: 4150 case Intrinsic::arm_cde_vcx1qa_predicated: 4151 case Intrinsic::arm_cde_vcx2q_predicated: 4152 case Intrinsic::arm_cde_vcx2qa_predicated: 4153 case Intrinsic::arm_cde_vcx3q_predicated: 4154 case Intrinsic::arm_cde_vcx3qa_predicated: 4155 Tys = {CI->getOperand(1)->getType(), V2I1Ty}; 4156 break; 4157 default: 4158 llvm_unreachable("Unhandled Intrinsic!"); 4159 } 4160 4161 std::vector<Value *> Ops; 4162 for (Value *Op : CI->args()) { 4163 Type *Ty = Op->getType(); 4164 if (Ty->getScalarSizeInBits() == 1) { 4165 Value *C1 = Builder.CreateIntrinsic( 4166 Intrinsic::arm_mve_pred_v2i, 4167 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op); 4168 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1); 4169 } 4170 Ops.push_back(Op); 4171 } 4172 4173 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr, 4174 CI->getName()); 4175 } 4176 llvm_unreachable("Unknown function for ARM CallBase upgrade."); 4177 } 4178 4179 // These are expected to have the arguments: 4180 // atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile) 4181 // 4182 // Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value). 4183 // 4184 static Value *upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, 4185 Function *F, IRBuilder<> &Builder) { 4186 AtomicRMWInst::BinOp RMWOp = 4187 StringSwitch<AtomicRMWInst::BinOp>(Name) 4188 .StartsWith("ds.fadd", AtomicRMWInst::FAdd) 4189 .StartsWith("ds.fmin", AtomicRMWInst::FMin) 4190 .StartsWith("ds.fmax", AtomicRMWInst::FMax) 4191 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap) 4192 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap) 4193 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd) 4194 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd) 4195 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin) 4196 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin) 4197 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax) 4198 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax); 4199 4200 unsigned NumOperands = CI->getNumOperands(); 4201 if (NumOperands < 3) // Malformed bitcode. 4202 return nullptr; 4203 4204 Value *Ptr = CI->getArgOperand(0); 4205 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); 4206 if (!PtrTy) // Malformed. 4207 return nullptr; 4208 4209 Value *Val = CI->getArgOperand(1); 4210 if (Val->getType() != CI->getType()) // Malformed. 4211 return nullptr; 4212 4213 ConstantInt *OrderArg = nullptr; 4214 bool IsVolatile = false; 4215 4216 // These should have 5 arguments (plus the callee). A separate version of the 4217 // ds_fadd intrinsic was defined for bf16 which was missing arguments. 4218 if (NumOperands > 3) 4219 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)); 4220 4221 // Ignore scope argument at 3 4222 4223 if (NumOperands > 5) { 4224 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4)); 4225 IsVolatile = !VolatileArg || !VolatileArg->isZero(); 4226 } 4227 4228 AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent; 4229 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue())) 4230 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue()); 4231 if (Order == AtomicOrdering::NotAtomic || Order == AtomicOrdering::Unordered) 4232 Order = AtomicOrdering::SequentiallyConsistent; 4233 4234 LLVMContext &Ctx = F->getContext(); 4235 4236 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat> 4237 Type *RetTy = CI->getType(); 4238 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) { 4239 if (VT->getElementType()->isIntegerTy(16)) { 4240 VectorType *AsBF16 = 4241 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount()); 4242 Val = Builder.CreateBitCast(Val, AsBF16); 4243 } 4244 } 4245 4246 // The scope argument never really worked correctly. Use agent as the most 4247 // conservative option which should still always produce the instruction. 4248 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent"); 4249 AtomicRMWInst *RMW = 4250 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID); 4251 4252 unsigned AddrSpace = PtrTy->getAddressSpace(); 4253 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) { 4254 MDNode *EmptyMD = MDNode::get(F->getContext(), {}); 4255 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD); 4256 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy()) 4257 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD); 4258 } 4259 4260 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) { 4261 MDBuilder MDB(F->getContext()); 4262 MDNode *RangeNotPrivate = 4263 MDB.createRange(APInt(32, AMDGPUAS::PRIVATE_ADDRESS), 4264 APInt(32, AMDGPUAS::PRIVATE_ADDRESS + 1)); 4265 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate); 4266 } 4267 4268 if (IsVolatile) 4269 RMW->setVolatile(true); 4270 4271 return Builder.CreateBitCast(RMW, RetTy); 4272 } 4273 4274 /// Helper to unwrap intrinsic call MetadataAsValue operands. 4275 template <typename MDType> 4276 static MDType *unwrapMAVOp(CallBase *CI, unsigned Op) { 4277 if (MetadataAsValue *MAV = dyn_cast<MetadataAsValue>(CI->getArgOperand(Op))) 4278 return dyn_cast<MDType>(MAV->getMetadata()); 4279 return nullptr; 4280 } 4281 4282 /// Convert debug intrinsic calls to non-instruction debug records. 4283 /// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value. 4284 /// \p CI - The debug intrinsic call. 4285 static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI) { 4286 DbgRecord *DR = nullptr; 4287 if (Name == "label") { 4288 DR = new DbgLabelRecord(unwrapMAVOp<DILabel>(CI, 0), CI->getDebugLoc()); 4289 } else if (Name == "assign") { 4290 DR = new DbgVariableRecord( 4291 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1), 4292 unwrapMAVOp<DIExpression>(CI, 2), unwrapMAVOp<DIAssignID>(CI, 3), 4293 unwrapMAVOp<Metadata>(CI, 4), unwrapMAVOp<DIExpression>(CI, 5), 4294 CI->getDebugLoc()); 4295 } else if (Name == "declare") { 4296 DR = new DbgVariableRecord( 4297 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, 1), 4298 unwrapMAVOp<DIExpression>(CI, 2), CI->getDebugLoc(), 4299 DbgVariableRecord::LocationType::Declare); 4300 } else if (Name == "addr") { 4301 // Upgrade dbg.addr to dbg.value with DW_OP_deref. 4302 DIExpression *Expr = unwrapMAVOp<DIExpression>(CI, 2); 4303 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref); 4304 DR = new DbgVariableRecord(unwrapMAVOp<Metadata>(CI, 0), 4305 unwrapMAVOp<DILocalVariable>(CI, 1), Expr, 4306 CI->getDebugLoc()); 4307 } else if (Name == "value") { 4308 // An old version of dbg.value had an extra offset argument. 4309 unsigned VarOp = 1; 4310 unsigned ExprOp = 2; 4311 if (CI->arg_size() == 4) { 4312 auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1)); 4313 // Nonzero offset dbg.values get dropped without a replacement. 4314 if (!Offset || !Offset->isZeroValue()) 4315 return; 4316 VarOp = 2; 4317 ExprOp = 3; 4318 } 4319 DR = new DbgVariableRecord( 4320 unwrapMAVOp<Metadata>(CI, 0), unwrapMAVOp<DILocalVariable>(CI, VarOp), 4321 unwrapMAVOp<DIExpression>(CI, ExprOp), CI->getDebugLoc()); 4322 } 4323 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord"); 4324 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator()); 4325 } 4326 4327 /// Upgrade a call to an old intrinsic. All argument and return casting must be 4328 /// provided to seamlessly integrate with existing context. 4329 void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { 4330 // Note dyn_cast to Function is not quite the same as getCalledFunction, which 4331 // checks the callee's function type matches. It's likely we need to handle 4332 // type changes here. 4333 Function *F = dyn_cast<Function>(CI->getCalledOperand()); 4334 if (!F) 4335 return; 4336 4337 LLVMContext &C = CI->getContext(); 4338 IRBuilder<> Builder(C); 4339 Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); 4340 4341 if (!NewFn) { 4342 bool FallthroughToDefaultUpgrade = false; 4343 // Get the Function's name. 4344 StringRef Name = F->getName(); 4345 4346 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'"); 4347 Name = Name.substr(5); 4348 4349 bool IsX86 = Name.consume_front("x86."); 4350 bool IsNVVM = Name.consume_front("nvvm."); 4351 bool IsAArch64 = Name.consume_front("aarch64."); 4352 bool IsARM = Name.consume_front("arm."); 4353 bool IsAMDGCN = Name.consume_front("amdgcn."); 4354 bool IsDbg = Name.consume_front("dbg."); 4355 Value *Rep = nullptr; 4356 4357 if (!IsX86 && Name == "stackprotectorcheck") { 4358 Rep = nullptr; 4359 } else if (IsNVVM) { 4360 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder); 4361 } else if (IsX86) { 4362 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder); 4363 } else if (IsAArch64) { 4364 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder); 4365 } else if (IsARM) { 4366 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder); 4367 } else if (IsAMDGCN) { 4368 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder); 4369 } else if (IsDbg) { 4370 // We might have decided we don't want the new format after all between 4371 // first requesting the upgrade and now; skip the conversion if that is 4372 // the case, and check here to see if the intrinsic needs to be upgraded 4373 // normally. 4374 if (!CI->getModule()->IsNewDbgInfoFormat) { 4375 bool NeedsUpgrade = 4376 upgradeIntrinsicFunction1(CI->getCalledFunction(), NewFn, false); 4377 if (!NeedsUpgrade) 4378 return; 4379 FallthroughToDefaultUpgrade = true; 4380 } else { 4381 upgradeDbgIntrinsicToDbgRecord(Name, CI); 4382 } 4383 } else { 4384 llvm_unreachable("Unknown function for CallBase upgrade."); 4385 } 4386 4387 if (!FallthroughToDefaultUpgrade) { 4388 if (Rep) 4389 CI->replaceAllUsesWith(Rep); 4390 CI->eraseFromParent(); 4391 return; 4392 } 4393 } 4394 4395 const auto &DefaultCase = [&]() -> void { 4396 if (CI->getFunctionType() == NewFn->getFunctionType()) { 4397 // Handle generic mangling change. 4398 assert( 4399 (CI->getCalledFunction()->getName() != NewFn->getName()) && 4400 "Unknown function for CallBase upgrade and isn't just a name change"); 4401 CI->setCalledFunction(NewFn); 4402 return; 4403 } 4404 4405 // This must be an upgrade from a named to a literal struct. 4406 if (auto *OldST = dyn_cast<StructType>(CI->getType())) { 4407 assert(OldST != NewFn->getReturnType() && 4408 "Return type must have changed"); 4409 assert(OldST->getNumElements() == 4410 cast<StructType>(NewFn->getReturnType())->getNumElements() && 4411 "Must have same number of elements"); 4412 4413 SmallVector<Value *> Args(CI->args()); 4414 CallInst *NewCI = Builder.CreateCall(NewFn, Args); 4415 NewCI->setAttributes(CI->getAttributes()); 4416 Value *Res = PoisonValue::get(OldST); 4417 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) { 4418 Value *Elem = Builder.CreateExtractValue(NewCI, Idx); 4419 Res = Builder.CreateInsertValue(Res, Elem, Idx); 4420 } 4421 CI->replaceAllUsesWith(Res); 4422 CI->eraseFromParent(); 4423 return; 4424 } 4425 4426 // We're probably about to produce something invalid. Let the verifier catch 4427 // it instead of dying here. 4428 CI->setCalledOperand( 4429 ConstantExpr::getPointerCast(NewFn, CI->getCalledOperand()->getType())); 4430 return; 4431 }; 4432 CallInst *NewCall = nullptr; 4433 switch (NewFn->getIntrinsicID()) { 4434 default: { 4435 DefaultCase(); 4436 return; 4437 } 4438 case Intrinsic::arm_neon_vst1: 4439 case Intrinsic::arm_neon_vst2: 4440 case Intrinsic::arm_neon_vst3: 4441 case Intrinsic::arm_neon_vst4: 4442 case Intrinsic::arm_neon_vst2lane: 4443 case Intrinsic::arm_neon_vst3lane: 4444 case Intrinsic::arm_neon_vst4lane: { 4445 SmallVector<Value *, 4> Args(CI->args()); 4446 NewCall = Builder.CreateCall(NewFn, Args); 4447 break; 4448 } 4449 case Intrinsic::aarch64_sve_bfmlalb_lane_v2: 4450 case Intrinsic::aarch64_sve_bfmlalt_lane_v2: 4451 case Intrinsic::aarch64_sve_bfdot_lane_v2: { 4452 LLVMContext &Ctx = F->getParent()->getContext(); 4453 SmallVector<Value *, 4> Args(CI->args()); 4454 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx), 4455 cast<ConstantInt>(Args[3])->getZExtValue()); 4456 NewCall = Builder.CreateCall(NewFn, Args); 4457 break; 4458 } 4459 case Intrinsic::aarch64_sve_ld3_sret: 4460 case Intrinsic::aarch64_sve_ld4_sret: 4461 case Intrinsic::aarch64_sve_ld2_sret: { 4462 StringRef Name = F->getName(); 4463 Name = Name.substr(5); 4464 unsigned N = StringSwitch<unsigned>(Name) 4465 .StartsWith("aarch64.sve.ld2", 2) 4466 .StartsWith("aarch64.sve.ld3", 3) 4467 .StartsWith("aarch64.sve.ld4", 4) 4468 .Default(0); 4469 auto *RetTy = cast<ScalableVectorType>(F->getReturnType()); 4470 unsigned MinElts = RetTy->getMinNumElements() / N; 4471 SmallVector<Value *, 2> Args(CI->args()); 4472 Value *NewLdCall = Builder.CreateCall(NewFn, Args); 4473 Value *Ret = llvm::PoisonValue::get(RetTy); 4474 for (unsigned I = 0; I < N; I++) { 4475 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts); 4476 Value *SRet = Builder.CreateExtractValue(NewLdCall, I); 4477 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, Idx); 4478 } 4479 NewCall = dyn_cast<CallInst>(Ret); 4480 break; 4481 } 4482 4483 case Intrinsic::coro_end: { 4484 SmallVector<Value *, 3> Args(CI->args()); 4485 Args.push_back(ConstantTokenNone::get(CI->getContext())); 4486 NewCall = Builder.CreateCall(NewFn, Args); 4487 break; 4488 } 4489 4490 case Intrinsic::vector_extract: { 4491 StringRef Name = F->getName(); 4492 Name = Name.substr(5); // Strip llvm 4493 if (!Name.starts_with("aarch64.sve.tuple.get")) { 4494 DefaultCase(); 4495 return; 4496 } 4497 auto *RetTy = cast<ScalableVectorType>(F->getReturnType()); 4498 unsigned MinElts = RetTy->getMinNumElements(); 4499 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 4500 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts); 4501 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx}); 4502 break; 4503 } 4504 4505 case Intrinsic::vector_insert: { 4506 StringRef Name = F->getName(); 4507 Name = Name.substr(5); 4508 if (!Name.starts_with("aarch64.sve.tuple")) { 4509 DefaultCase(); 4510 return; 4511 } 4512 if (Name.starts_with("aarch64.sve.tuple.set")) { 4513 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); 4514 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType()); 4515 Value *NewIdx = 4516 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements()); 4517 NewCall = Builder.CreateCall( 4518 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx}); 4519 break; 4520 } 4521 if (Name.starts_with("aarch64.sve.tuple.create")) { 4522 unsigned N = StringSwitch<unsigned>(Name) 4523 .StartsWith("aarch64.sve.tuple.create2", 2) 4524 .StartsWith("aarch64.sve.tuple.create3", 3) 4525 .StartsWith("aarch64.sve.tuple.create4", 4) 4526 .Default(0); 4527 assert(N > 1 && "Create is expected to be between 2-4"); 4528 auto *RetTy = cast<ScalableVectorType>(F->getReturnType()); 4529 Value *Ret = llvm::PoisonValue::get(RetTy); 4530 unsigned MinElts = RetTy->getMinNumElements() / N; 4531 for (unsigned I = 0; I < N; I++) { 4532 Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts); 4533 Value *V = CI->getArgOperand(I); 4534 Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx); 4535 } 4536 NewCall = dyn_cast<CallInst>(Ret); 4537 } 4538 break; 4539 } 4540 4541 case Intrinsic::arm_neon_bfdot: 4542 case Intrinsic::arm_neon_bfmmla: 4543 case Intrinsic::arm_neon_bfmlalb: 4544 case Intrinsic::arm_neon_bfmlalt: 4545 case Intrinsic::aarch64_neon_bfdot: 4546 case Intrinsic::aarch64_neon_bfmmla: 4547 case Intrinsic::aarch64_neon_bfmlalb: 4548 case Intrinsic::aarch64_neon_bfmlalt: { 4549 SmallVector<Value *, 3> Args; 4550 assert(CI->arg_size() == 3 && 4551 "Mismatch between function args and call args"); 4552 size_t OperandWidth = 4553 CI->getArgOperand(1)->getType()->getPrimitiveSizeInBits(); 4554 assert((OperandWidth == 64 || OperandWidth == 128) && 4555 "Unexpected operand width"); 4556 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16); 4557 auto Iter = CI->args().begin(); 4558 Args.push_back(*Iter++); 4559 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy)); 4560 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy)); 4561 NewCall = Builder.CreateCall(NewFn, Args); 4562 break; 4563 } 4564 4565 case Intrinsic::bitreverse: 4566 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 4567 break; 4568 4569 case Intrinsic::ctlz: 4570 case Intrinsic::cttz: 4571 assert(CI->arg_size() == 1 && 4572 "Mismatch between function args and call args"); 4573 NewCall = 4574 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()}); 4575 break; 4576 4577 case Intrinsic::objectsize: { 4578 Value *NullIsUnknownSize = 4579 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2); 4580 Value *Dynamic = 4581 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3); 4582 NewCall = Builder.CreateCall( 4583 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic}); 4584 break; 4585 } 4586 4587 case Intrinsic::ctpop: 4588 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 4589 break; 4590 4591 case Intrinsic::convert_from_fp16: 4592 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)}); 4593 break; 4594 4595 case Intrinsic::dbg_value: { 4596 StringRef Name = F->getName(); 4597 Name = Name.substr(5); // Strip llvm. 4598 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`. 4599 if (Name.starts_with("dbg.addr")) { 4600 DIExpression *Expr = cast<DIExpression>( 4601 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata()); 4602 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref); 4603 NewCall = 4604 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), 4605 MetadataAsValue::get(C, Expr)}); 4606 break; 4607 } 4608 4609 // Upgrade from the old version that had an extra offset argument. 4610 assert(CI->arg_size() == 4); 4611 // Drop nonzero offsets instead of attempting to upgrade them. 4612 if (auto *Offset = dyn_cast_or_null<Constant>(CI->getArgOperand(1))) 4613 if (Offset->isZeroValue()) { 4614 NewCall = Builder.CreateCall( 4615 NewFn, 4616 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)}); 4617 break; 4618 } 4619 CI->eraseFromParent(); 4620 return; 4621 } 4622 4623 case Intrinsic::ptr_annotation: 4624 // Upgrade from versions that lacked the annotation attribute argument. 4625 if (CI->arg_size() != 4) { 4626 DefaultCase(); 4627 return; 4628 } 4629 4630 // Create a new call with an added null annotation attribute argument. 4631 NewCall = 4632 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), 4633 CI->getArgOperand(2), CI->getArgOperand(3), 4634 Constant::getNullValue(Builder.getPtrTy())}); 4635 NewCall->takeName(CI); 4636 CI->replaceAllUsesWith(NewCall); 4637 CI->eraseFromParent(); 4638 return; 4639 4640 case Intrinsic::var_annotation: 4641 // Upgrade from versions that lacked the annotation attribute argument. 4642 if (CI->arg_size() != 4) { 4643 DefaultCase(); 4644 return; 4645 } 4646 // Create a new call with an added null annotation attribute argument. 4647 NewCall = 4648 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), 4649 CI->getArgOperand(2), CI->getArgOperand(3), 4650 Constant::getNullValue(Builder.getPtrTy())}); 4651 NewCall->takeName(CI); 4652 CI->replaceAllUsesWith(NewCall); 4653 CI->eraseFromParent(); 4654 return; 4655 4656 case Intrinsic::riscv_aes32dsi: 4657 case Intrinsic::riscv_aes32dsmi: 4658 case Intrinsic::riscv_aes32esi: 4659 case Intrinsic::riscv_aes32esmi: 4660 case Intrinsic::riscv_sm4ks: 4661 case Intrinsic::riscv_sm4ed: { 4662 // The last argument to these intrinsics used to be i8 and changed to i32. 4663 // The type overload for sm4ks and sm4ed was removed. 4664 Value *Arg2 = CI->getArgOperand(2); 4665 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64)) 4666 return; 4667 4668 Value *Arg0 = CI->getArgOperand(0); 4669 Value *Arg1 = CI->getArgOperand(1); 4670 if (CI->getType()->isIntegerTy(64)) { 4671 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty()); 4672 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty()); 4673 } 4674 4675 Arg2 = ConstantInt::get(Type::getInt32Ty(C), 4676 cast<ConstantInt>(Arg2)->getZExtValue()); 4677 4678 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2}); 4679 Value *Res = NewCall; 4680 if (Res->getType() != CI->getType()) 4681 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true); 4682 NewCall->takeName(CI); 4683 CI->replaceAllUsesWith(Res); 4684 CI->eraseFromParent(); 4685 return; 4686 } 4687 case Intrinsic::riscv_sha256sig0: 4688 case Intrinsic::riscv_sha256sig1: 4689 case Intrinsic::riscv_sha256sum0: 4690 case Intrinsic::riscv_sha256sum1: 4691 case Intrinsic::riscv_sm3p0: 4692 case Intrinsic::riscv_sm3p1: { 4693 // The last argument to these intrinsics used to be i8 and changed to i32. 4694 // The type overload for sm4ks and sm4ed was removed. 4695 if (!CI->getType()->isIntegerTy(64)) 4696 return; 4697 4698 Value *Arg = 4699 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty()); 4700 4701 NewCall = Builder.CreateCall(NewFn, Arg); 4702 Value *Res = 4703 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true); 4704 NewCall->takeName(CI); 4705 CI->replaceAllUsesWith(Res); 4706 CI->eraseFromParent(); 4707 return; 4708 } 4709 4710 case Intrinsic::x86_xop_vfrcz_ss: 4711 case Intrinsic::x86_xop_vfrcz_sd: 4712 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)}); 4713 break; 4714 4715 case Intrinsic::x86_xop_vpermil2pd: 4716 case Intrinsic::x86_xop_vpermil2ps: 4717 case Intrinsic::x86_xop_vpermil2pd_256: 4718 case Intrinsic::x86_xop_vpermil2ps_256: { 4719 SmallVector<Value *, 4> Args(CI->args()); 4720 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType()); 4721 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy); 4722 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy); 4723 NewCall = Builder.CreateCall(NewFn, Args); 4724 break; 4725 } 4726 4727 case Intrinsic::x86_sse41_ptestc: 4728 case Intrinsic::x86_sse41_ptestz: 4729 case Intrinsic::x86_sse41_ptestnzc: { 4730 // The arguments for these intrinsics used to be v4f32, and changed 4731 // to v2i64. This is purely a nop, since those are bitwise intrinsics. 4732 // So, the only thing required is a bitcast for both arguments. 4733 // First, check the arguments have the old type. 4734 Value *Arg0 = CI->getArgOperand(0); 4735 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4)) 4736 return; 4737 4738 // Old intrinsic, add bitcasts 4739 Value *Arg1 = CI->getArgOperand(1); 4740 4741 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2); 4742 4743 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast"); 4744 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast"); 4745 4746 NewCall = Builder.CreateCall(NewFn, {BC0, BC1}); 4747 break; 4748 } 4749 4750 case Intrinsic::x86_rdtscp: { 4751 // This used to take 1 arguments. If we have no arguments, it is already 4752 // upgraded. 4753 if (CI->getNumOperands() == 0) 4754 return; 4755 4756 NewCall = Builder.CreateCall(NewFn); 4757 // Extract the second result and store it. 4758 Value *Data = Builder.CreateExtractValue(NewCall, 1); 4759 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1)); 4760 // Replace the original call result with the first result of the new call. 4761 Value *TSC = Builder.CreateExtractValue(NewCall, 0); 4762 4763 NewCall->takeName(CI); 4764 CI->replaceAllUsesWith(TSC); 4765 CI->eraseFromParent(); 4766 return; 4767 } 4768 4769 case Intrinsic::x86_sse41_insertps: 4770 case Intrinsic::x86_sse41_dppd: 4771 case Intrinsic::x86_sse41_dpps: 4772 case Intrinsic::x86_sse41_mpsadbw: 4773 case Intrinsic::x86_avx_dp_ps_256: 4774 case Intrinsic::x86_avx2_mpsadbw: { 4775 // Need to truncate the last argument from i32 to i8 -- this argument models 4776 // an inherently 8-bit immediate operand to these x86 instructions. 4777 SmallVector<Value *, 4> Args(CI->args()); 4778 4779 // Replace the last argument with a trunc. 4780 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc"); 4781 NewCall = Builder.CreateCall(NewFn, Args); 4782 break; 4783 } 4784 4785 case Intrinsic::x86_avx512_mask_cmp_pd_128: 4786 case Intrinsic::x86_avx512_mask_cmp_pd_256: 4787 case Intrinsic::x86_avx512_mask_cmp_pd_512: 4788 case Intrinsic::x86_avx512_mask_cmp_ps_128: 4789 case Intrinsic::x86_avx512_mask_cmp_ps_256: 4790 case Intrinsic::x86_avx512_mask_cmp_ps_512: { 4791 SmallVector<Value *, 4> Args(CI->args()); 4792 unsigned NumElts = 4793 cast<FixedVectorType>(Args[0]->getType())->getNumElements(); 4794 Args[3] = getX86MaskVec(Builder, Args[3], NumElts); 4795 4796 NewCall = Builder.CreateCall(NewFn, Args); 4797 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr); 4798 4799 NewCall->takeName(CI); 4800 CI->replaceAllUsesWith(Res); 4801 CI->eraseFromParent(); 4802 return; 4803 } 4804 4805 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128: 4806 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256: 4807 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512: 4808 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128: 4809 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256: 4810 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: { 4811 SmallVector<Value *, 4> Args(CI->args()); 4812 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements(); 4813 if (NewFn->getIntrinsicID() == 4814 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128) 4815 Args[1] = Builder.CreateBitCast( 4816 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts)); 4817 4818 NewCall = Builder.CreateCall(NewFn, Args); 4819 Value *Res = Builder.CreateBitCast( 4820 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts)); 4821 4822 NewCall->takeName(CI); 4823 CI->replaceAllUsesWith(Res); 4824 CI->eraseFromParent(); 4825 return; 4826 } 4827 case Intrinsic::x86_avx512bf16_dpbf16ps_128: 4828 case Intrinsic::x86_avx512bf16_dpbf16ps_256: 4829 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{ 4830 SmallVector<Value *, 4> Args(CI->args()); 4831 unsigned NumElts = 4832 cast<FixedVectorType>(CI->getType())->getNumElements() * 2; 4833 Args[1] = Builder.CreateBitCast( 4834 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts)); 4835 Args[2] = Builder.CreateBitCast( 4836 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts)); 4837 4838 NewCall = Builder.CreateCall(NewFn, Args); 4839 break; 4840 } 4841 4842 case Intrinsic::thread_pointer: { 4843 NewCall = Builder.CreateCall(NewFn, {}); 4844 break; 4845 } 4846 4847 case Intrinsic::memcpy: 4848 case Intrinsic::memmove: 4849 case Intrinsic::memset: { 4850 // We have to make sure that the call signature is what we're expecting. 4851 // We only want to change the old signatures by removing the alignment arg: 4852 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1) 4853 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1) 4854 // @llvm.memset...(i8*, i8, i[32|64], i32, i1) 4855 // -> @llvm.memset...(i8*, i8, i[32|64], i1) 4856 // Note: i8*'s in the above can be any pointer type 4857 if (CI->arg_size() != 5) { 4858 DefaultCase(); 4859 return; 4860 } 4861 // Remove alignment argument (3), and add alignment attributes to the 4862 // dest/src pointers. 4863 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1), 4864 CI->getArgOperand(2), CI->getArgOperand(4)}; 4865 NewCall = Builder.CreateCall(NewFn, Args); 4866 AttributeList OldAttrs = CI->getAttributes(); 4867 AttributeList NewAttrs = AttributeList::get( 4868 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(), 4869 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1), 4870 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)}); 4871 NewCall->setAttributes(NewAttrs); 4872 auto *MemCI = cast<MemIntrinsic>(NewCall); 4873 // All mem intrinsics support dest alignment. 4874 const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3)); 4875 MemCI->setDestAlignment(Align->getMaybeAlignValue()); 4876 // Memcpy/Memmove also support source alignment. 4877 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI)) 4878 MTI->setSourceAlignment(Align->getMaybeAlignValue()); 4879 break; 4880 } 4881 } 4882 assert(NewCall && "Should have either set this variable or returned through " 4883 "the default case"); 4884 NewCall->takeName(CI); 4885 CI->replaceAllUsesWith(NewCall); 4886 CI->eraseFromParent(); 4887 } 4888 4889 void llvm::UpgradeCallsToIntrinsic(Function *F) { 4890 assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); 4891 4892 // Check if this function should be upgraded and get the replacement function 4893 // if there is one. 4894 Function *NewFn; 4895 if (UpgradeIntrinsicFunction(F, NewFn)) { 4896 // Replace all users of the old function with the new function or new 4897 // instructions. This is not a range loop because the call is deleted. 4898 for (User *U : make_early_inc_range(F->users())) 4899 if (CallBase *CB = dyn_cast<CallBase>(U)) 4900 UpgradeIntrinsicCall(CB, NewFn); 4901 4902 // Remove old function, no longer used, from the module. 4903 F->eraseFromParent(); 4904 } 4905 } 4906 4907 MDNode *llvm::UpgradeTBAANode(MDNode &MD) { 4908 const unsigned NumOperands = MD.getNumOperands(); 4909 if (NumOperands == 0) 4910 return &MD; // Invalid, punt to a verifier error. 4911 4912 // Check if the tag uses struct-path aware TBAA format. 4913 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3) 4914 return &MD; 4915 4916 auto &Context = MD.getContext(); 4917 if (NumOperands == 3) { 4918 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)}; 4919 MDNode *ScalarType = MDNode::get(Context, Elts); 4920 // Create a MDNode <ScalarType, ScalarType, offset 0, const> 4921 Metadata *Elts2[] = {ScalarType, ScalarType, 4922 ConstantAsMetadata::get( 4923 Constant::getNullValue(Type::getInt64Ty(Context))), 4924 MD.getOperand(2)}; 4925 return MDNode::get(Context, Elts2); 4926 } 4927 // Create a MDNode <MD, MD, offset 0> 4928 Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue( 4929 Type::getInt64Ty(Context)))}; 4930 return MDNode::get(Context, Elts); 4931 } 4932 4933 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, 4934 Instruction *&Temp) { 4935 if (Opc != Instruction::BitCast) 4936 return nullptr; 4937 4938 Temp = nullptr; 4939 Type *SrcTy = V->getType(); 4940 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 4941 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 4942 LLVMContext &Context = V->getContext(); 4943 4944 // We have no information about target data layout, so we assume that 4945 // the maximum pointer size is 64bit. 4946 Type *MidTy = Type::getInt64Ty(Context); 4947 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); 4948 4949 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); 4950 } 4951 4952 return nullptr; 4953 } 4954 4955 Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { 4956 if (Opc != Instruction::BitCast) 4957 return nullptr; 4958 4959 Type *SrcTy = C->getType(); 4960 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && 4961 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { 4962 LLVMContext &Context = C->getContext(); 4963 4964 // We have no information about target data layout, so we assume that 4965 // the maximum pointer size is 64bit. 4966 Type *MidTy = Type::getInt64Ty(Context); 4967 4968 return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), 4969 DestTy); 4970 } 4971 4972 return nullptr; 4973 } 4974 4975 /// Check the debug info version number, if it is out-dated, drop the debug 4976 /// info. Return true if module is modified. 4977 bool llvm::UpgradeDebugInfo(Module &M) { 4978 if (DisableAutoUpgradeDebugInfo) 4979 return false; 4980 4981 // We need to get metadata before the module is verified (i.e., getModuleFlag 4982 // makes assumptions that we haven't verified yet). Carefully extract the flag 4983 // from the metadata. 4984 unsigned Version = 0; 4985 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) { 4986 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) { 4987 if (Flag->getNumOperands() < 3) 4988 return false; 4989 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1))) 4990 return K->getString() == "Debug Info Version"; 4991 return false; 4992 }); 4993 if (OpIt != ModFlags->op_end()) { 4994 const MDOperand &ValOp = (*OpIt)->getOperand(2); 4995 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp)) 4996 Version = CI->getZExtValue(); 4997 } 4998 } 4999 5000 if (Version == DEBUG_METADATA_VERSION) { 5001 bool BrokenDebugInfo = false; 5002 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo)) 5003 report_fatal_error("Broken module found, compilation aborted!"); 5004 if (!BrokenDebugInfo) 5005 // Everything is ok. 5006 return false; 5007 else { 5008 // Diagnose malformed debug info. 5009 DiagnosticInfoIgnoringInvalidDebugMetadata Diag(M); 5010 M.getContext().diagnose(Diag); 5011 } 5012 } 5013 bool Modified = StripDebugInfo(M); 5014 if (Modified && Version != DEBUG_METADATA_VERSION) { 5015 // Diagnose a version mismatch. 5016 DiagnosticInfoDebugMetadataVersion DiagVersion(M, Version); 5017 M.getContext().diagnose(DiagVersion); 5018 } 5019 return Modified; 5020 } 5021 5022 /// This checks for objc retain release marker which should be upgraded. It 5023 /// returns true if module is modified. 5024 static bool upgradeRetainReleaseMarker(Module &M) { 5025 bool Changed = false; 5026 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker"; 5027 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey); 5028 if (ModRetainReleaseMarker) { 5029 MDNode *Op = ModRetainReleaseMarker->getOperand(0); 5030 if (Op) { 5031 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0)); 5032 if (ID) { 5033 SmallVector<StringRef, 4> ValueComp; 5034 ID->getString().split(ValueComp, "#"); 5035 if (ValueComp.size() == 2) { 5036 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str(); 5037 ID = MDString::get(M.getContext(), NewValue); 5038 } 5039 M.addModuleFlag(Module::Error, MarkerKey, ID); 5040 M.eraseNamedMetadata(ModRetainReleaseMarker); 5041 Changed = true; 5042 } 5043 } 5044 } 5045 return Changed; 5046 } 5047 5048 void llvm::UpgradeARCRuntime(Module &M) { 5049 // This lambda converts normal function calls to ARC runtime functions to 5050 // intrinsic calls. 5051 auto UpgradeToIntrinsic = [&](const char *OldFunc, 5052 llvm::Intrinsic::ID IntrinsicFunc) { 5053 Function *Fn = M.getFunction(OldFunc); 5054 5055 if (!Fn) 5056 return; 5057 5058 Function *NewFn = 5059 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc); 5060 5061 for (User *U : make_early_inc_range(Fn->users())) { 5062 CallInst *CI = dyn_cast<CallInst>(U); 5063 if (!CI || CI->getCalledFunction() != Fn) 5064 continue; 5065 5066 IRBuilder<> Builder(CI->getParent(), CI->getIterator()); 5067 FunctionType *NewFuncTy = NewFn->getFunctionType(); 5068 SmallVector<Value *, 2> Args; 5069 5070 // Don't upgrade the intrinsic if it's not valid to bitcast the return 5071 // value to the return type of the old function. 5072 if (NewFuncTy->getReturnType() != CI->getType() && 5073 !CastInst::castIsValid(Instruction::BitCast, CI, 5074 NewFuncTy->getReturnType())) 5075 continue; 5076 5077 bool InvalidCast = false; 5078 5079 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) { 5080 Value *Arg = CI->getArgOperand(I); 5081 5082 // Bitcast argument to the parameter type of the new function if it's 5083 // not a variadic argument. 5084 if (I < NewFuncTy->getNumParams()) { 5085 // Don't upgrade the intrinsic if it's not valid to bitcast the argument 5086 // to the parameter type of the new function. 5087 if (!CastInst::castIsValid(Instruction::BitCast, Arg, 5088 NewFuncTy->getParamType(I))) { 5089 InvalidCast = true; 5090 break; 5091 } 5092 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I)); 5093 } 5094 Args.push_back(Arg); 5095 } 5096 5097 if (InvalidCast) 5098 continue; 5099 5100 // Create a call instruction that calls the new function. 5101 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args); 5102 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind()); 5103 NewCall->takeName(CI); 5104 5105 // Bitcast the return value back to the type of the old call. 5106 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType()); 5107 5108 if (!CI->use_empty()) 5109 CI->replaceAllUsesWith(NewRetVal); 5110 CI->eraseFromParent(); 5111 } 5112 5113 if (Fn->use_empty()) 5114 Fn->eraseFromParent(); 5115 }; 5116 5117 // Unconditionally convert a call to "clang.arc.use" to a call to 5118 // "llvm.objc.clang.arc.use". 5119 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use); 5120 5121 // Upgrade the retain release marker. If there is no need to upgrade 5122 // the marker, that means either the module is already new enough to contain 5123 // new intrinsics or it is not ARC. There is no need to upgrade runtime call. 5124 if (!upgradeRetainReleaseMarker(M)) 5125 return; 5126 5127 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = { 5128 {"objc_autorelease", llvm::Intrinsic::objc_autorelease}, 5129 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop}, 5130 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush}, 5131 {"objc_autoreleaseReturnValue", 5132 llvm::Intrinsic::objc_autoreleaseReturnValue}, 5133 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak}, 5134 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak}, 5135 {"objc_initWeak", llvm::Intrinsic::objc_initWeak}, 5136 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak}, 5137 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained}, 5138 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak}, 5139 {"objc_release", llvm::Intrinsic::objc_release}, 5140 {"objc_retain", llvm::Intrinsic::objc_retain}, 5141 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease}, 5142 {"objc_retainAutoreleaseReturnValue", 5143 llvm::Intrinsic::objc_retainAutoreleaseReturnValue}, 5144 {"objc_retainAutoreleasedReturnValue", 5145 llvm::Intrinsic::objc_retainAutoreleasedReturnValue}, 5146 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock}, 5147 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong}, 5148 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak}, 5149 {"objc_unsafeClaimAutoreleasedReturnValue", 5150 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue}, 5151 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject}, 5152 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject}, 5153 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer}, 5154 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease}, 5155 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter}, 5156 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit}, 5157 {"objc_arc_annotation_topdown_bbstart", 5158 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart}, 5159 {"objc_arc_annotation_topdown_bbend", 5160 llvm::Intrinsic::objc_arc_annotation_topdown_bbend}, 5161 {"objc_arc_annotation_bottomup_bbstart", 5162 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart}, 5163 {"objc_arc_annotation_bottomup_bbend", 5164 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}}; 5165 5166 for (auto &I : RuntimeFuncs) 5167 UpgradeToIntrinsic(I.first, I.second); 5168 } 5169 5170 bool llvm::UpgradeModuleFlags(Module &M) { 5171 NamedMDNode *ModFlags = M.getModuleFlagsMetadata(); 5172 if (!ModFlags) 5173 return false; 5174 5175 bool HasObjCFlag = false, HasClassProperties = false, Changed = false; 5176 bool HasSwiftVersionFlag = false; 5177 uint8_t SwiftMajorVersion, SwiftMinorVersion; 5178 uint32_t SwiftABIVersion; 5179 auto Int8Ty = Type::getInt8Ty(M.getContext()); 5180 auto Int32Ty = Type::getInt32Ty(M.getContext()); 5181 5182 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) { 5183 MDNode *Op = ModFlags->getOperand(I); 5184 if (Op->getNumOperands() != 3) 5185 continue; 5186 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1)); 5187 if (!ID) 5188 continue; 5189 auto SetBehavior = [&](Module::ModFlagBehavior B) { 5190 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get( 5191 Type::getInt32Ty(M.getContext()), B)), 5192 MDString::get(M.getContext(), ID->getString()), 5193 Op->getOperand(2)}; 5194 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); 5195 Changed = true; 5196 }; 5197 5198 if (ID->getString() == "Objective-C Image Info Version") 5199 HasObjCFlag = true; 5200 if (ID->getString() == "Objective-C Class Properties") 5201 HasClassProperties = true; 5202 // Upgrade PIC from Error/Max to Min. 5203 if (ID->getString() == "PIC Level") { 5204 if (auto *Behavior = 5205 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) { 5206 uint64_t V = Behavior->getLimitedValue(); 5207 if (V == Module::Error || V == Module::Max) 5208 SetBehavior(Module::Min); 5209 } 5210 } 5211 // Upgrade "PIE Level" from Error to Max. 5212 if (ID->getString() == "PIE Level") 5213 if (auto *Behavior = 5214 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) 5215 if (Behavior->getLimitedValue() == Module::Error) 5216 SetBehavior(Module::Max); 5217 5218 // Upgrade branch protection and return address signing module flags. The 5219 // module flag behavior for these fields were Error and now they are Min. 5220 if (ID->getString() == "branch-target-enforcement" || 5221 ID->getString().starts_with("sign-return-address")) { 5222 if (auto *Behavior = 5223 mdconst::dyn_extract_or_null<ConstantInt>(Op->getOperand(0))) { 5224 if (Behavior->getLimitedValue() == Module::Error) { 5225 Type *Int32Ty = Type::getInt32Ty(M.getContext()); 5226 Metadata *Ops[3] = { 5227 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)), 5228 Op->getOperand(1), Op->getOperand(2)}; 5229 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); 5230 Changed = true; 5231 } 5232 } 5233 } 5234 5235 // Upgrade Objective-C Image Info Section. Removed the whitespce in the 5236 // section name so that llvm-lto will not complain about mismatching 5237 // module flags that is functionally the same. 5238 if (ID->getString() == "Objective-C Image Info Section") { 5239 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) { 5240 SmallVector<StringRef, 4> ValueComp; 5241 Value->getString().split(ValueComp, " "); 5242 if (ValueComp.size() != 1) { 5243 std::string NewValue; 5244 for (auto &S : ValueComp) 5245 NewValue += S.str(); 5246 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1), 5247 MDString::get(M.getContext(), NewValue)}; 5248 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); 5249 Changed = true; 5250 } 5251 } 5252 } 5253 5254 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value. 5255 // If the higher bits are set, it adds new module flag for swift info. 5256 if (ID->getString() == "Objective-C Garbage Collection") { 5257 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2)); 5258 if (Md) { 5259 assert(Md->getValue() && "Expected non-empty metadata"); 5260 auto Type = Md->getValue()->getType(); 5261 if (Type == Int8Ty) 5262 continue; 5263 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue(); 5264 if ((Val & 0xff) != Val) { 5265 HasSwiftVersionFlag = true; 5266 SwiftABIVersion = (Val & 0xff00) >> 8; 5267 SwiftMajorVersion = (Val & 0xff000000) >> 24; 5268 SwiftMinorVersion = (Val & 0xff0000) >> 16; 5269 } 5270 Metadata *Ops[3] = { 5271 ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)), 5272 Op->getOperand(1), 5273 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))}; 5274 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); 5275 Changed = true; 5276 } 5277 } 5278 5279 if (ID->getString() == "amdgpu_code_object_version") { 5280 Metadata *Ops[3] = { 5281 Op->getOperand(0), 5282 MDString::get(M.getContext(), "amdhsa_code_object_version"), 5283 Op->getOperand(2)}; 5284 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops)); 5285 Changed = true; 5286 } 5287 } 5288 5289 // "Objective-C Class Properties" is recently added for Objective-C. We 5290 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module 5291 // flag of value 0, so we can correclty downgrade this flag when trying to 5292 // link an ObjC bitcode without this module flag with an ObjC bitcode with 5293 // this module flag. 5294 if (HasObjCFlag && !HasClassProperties) { 5295 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties", 5296 (uint32_t)0); 5297 Changed = true; 5298 } 5299 5300 if (HasSwiftVersionFlag) { 5301 M.addModuleFlag(Module::Error, "Swift ABI Version", 5302 SwiftABIVersion); 5303 M.addModuleFlag(Module::Error, "Swift Major Version", 5304 ConstantInt::get(Int8Ty, SwiftMajorVersion)); 5305 M.addModuleFlag(Module::Error, "Swift Minor Version", 5306 ConstantInt::get(Int8Ty, SwiftMinorVersion)); 5307 Changed = true; 5308 } 5309 5310 return Changed; 5311 } 5312 5313 void llvm::UpgradeSectionAttributes(Module &M) { 5314 auto TrimSpaces = [](StringRef Section) -> std::string { 5315 SmallVector<StringRef, 5> Components; 5316 Section.split(Components, ','); 5317 5318 SmallString<32> Buffer; 5319 raw_svector_ostream OS(Buffer); 5320 5321 for (auto Component : Components) 5322 OS << ',' << Component.trim(); 5323 5324 return std::string(OS.str().substr(1)); 5325 }; 5326 5327 for (auto &GV : M.globals()) { 5328 if (!GV.hasSection()) 5329 continue; 5330 5331 StringRef Section = GV.getSection(); 5332 5333 if (!Section.starts_with("__DATA, __objc_catlist")) 5334 continue; 5335 5336 // __DATA, __objc_catlist, regular, no_dead_strip 5337 // __DATA,__objc_catlist,regular,no_dead_strip 5338 GV.setSection(TrimSpaces(Section)); 5339 } 5340 } 5341 5342 namespace { 5343 // Prior to LLVM 10.0, the strictfp attribute could be used on individual 5344 // callsites within a function that did not also have the strictfp attribute. 5345 // Since 10.0, if strict FP semantics are needed within a function, the 5346 // function must have the strictfp attribute and all calls within the function 5347 // must also have the strictfp attribute. This latter restriction is 5348 // necessary to prevent unwanted libcall simplification when a function is 5349 // being cloned (such as for inlining). 5350 // 5351 // The "dangling" strictfp attribute usage was only used to prevent constant 5352 // folding and other libcall simplification. The nobuiltin attribute on the 5353 // callsite has the same effect. 5354 struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> { 5355 StrictFPUpgradeVisitor() = default; 5356 5357 void visitCallBase(CallBase &Call) { 5358 if (!Call.isStrictFP()) 5359 return; 5360 if (isa<ConstrainedFPIntrinsic>(&Call)) 5361 return; 5362 // If we get here, the caller doesn't have the strictfp attribute 5363 // but this callsite does. Replace the strictfp attribute with nobuiltin. 5364 Call.removeFnAttr(Attribute::StrictFP); 5365 Call.addFnAttr(Attribute::NoBuiltin); 5366 } 5367 }; 5368 5369 /// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata 5370 struct AMDGPUUnsafeFPAtomicsUpgradeVisitor 5371 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> { 5372 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default; 5373 5374 void visitAtomicRMWInst(AtomicRMWInst &RMW) { 5375 if (!RMW.isFloatingPointOperation()) 5376 return; 5377 5378 MDNode *Empty = MDNode::get(RMW.getContext(), {}); 5379 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty); 5380 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty); 5381 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty); 5382 } 5383 }; 5384 } // namespace 5385 5386 void llvm::UpgradeFunctionAttributes(Function &F) { 5387 // If a function definition doesn't have the strictfp attribute, 5388 // convert any callsite strictfp attributes to nobuiltin. 5389 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) { 5390 StrictFPUpgradeVisitor SFPV; 5391 SFPV.visit(F); 5392 } 5393 5394 // Remove all incompatibile attributes from function. 5395 F.removeRetAttrs(AttributeFuncs::typeIncompatible( 5396 F.getReturnType(), F.getAttributes().getRetAttrs())); 5397 for (auto &Arg : F.args()) 5398 Arg.removeAttrs( 5399 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes())); 5400 5401 // Older versions of LLVM treated an "implicit-section-name" attribute 5402 // similarly to directly setting the section on a Function. 5403 if (Attribute A = F.getFnAttribute("implicit-section-name"); 5404 A.isValid() && A.isStringAttribute()) { 5405 F.setSection(A.getValueAsString()); 5406 F.removeFnAttr("implicit-section-name"); 5407 } 5408 5409 if (!F.empty()) { 5410 // For some reason this is called twice, and the first time is before any 5411 // instructions are loaded into the body. 5412 5413 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics"); 5414 A.isValid()) { 5415 5416 if (A.getValueAsBool()) { 5417 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor; 5418 Visitor.visit(F); 5419 } 5420 5421 // We will leave behind dead attribute uses on external declarations, but 5422 // clang never added these to declarations anyway. 5423 F.removeFnAttr("amdgpu-unsafe-fp-atomics"); 5424 } 5425 } 5426 } 5427 5428 static bool isOldLoopArgument(Metadata *MD) { 5429 auto *T = dyn_cast_or_null<MDTuple>(MD); 5430 if (!T) 5431 return false; 5432 if (T->getNumOperands() < 1) 5433 return false; 5434 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0)); 5435 if (!S) 5436 return false; 5437 return S->getString().starts_with("llvm.vectorizer."); 5438 } 5439 5440 static MDString *upgradeLoopTag(LLVMContext &C, StringRef OldTag) { 5441 StringRef OldPrefix = "llvm.vectorizer."; 5442 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix"); 5443 5444 if (OldTag == "llvm.vectorizer.unroll") 5445 return MDString::get(C, "llvm.loop.interleave.count"); 5446 5447 return MDString::get( 5448 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size())) 5449 .str()); 5450 } 5451 5452 static Metadata *upgradeLoopArgument(Metadata *MD) { 5453 auto *T = dyn_cast_or_null<MDTuple>(MD); 5454 if (!T) 5455 return MD; 5456 if (T->getNumOperands() < 1) 5457 return MD; 5458 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0)); 5459 if (!OldTag) 5460 return MD; 5461 if (!OldTag->getString().starts_with("llvm.vectorizer.")) 5462 return MD; 5463 5464 // This has an old tag. Upgrade it. 5465 SmallVector<Metadata *, 8> Ops; 5466 Ops.reserve(T->getNumOperands()); 5467 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString())); 5468 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I) 5469 Ops.push_back(T->getOperand(I)); 5470 5471 return MDTuple::get(T->getContext(), Ops); 5472 } 5473 5474 MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) { 5475 auto *T = dyn_cast<MDTuple>(&N); 5476 if (!T) 5477 return &N; 5478 5479 if (none_of(T->operands(), isOldLoopArgument)) 5480 return &N; 5481 5482 SmallVector<Metadata *, 8> Ops; 5483 Ops.reserve(T->getNumOperands()); 5484 for (Metadata *MD : T->operands()) 5485 Ops.push_back(upgradeLoopArgument(MD)); 5486 5487 return MDTuple::get(T->getContext(), Ops); 5488 } 5489 5490 std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) { 5491 Triple T(TT); 5492 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting 5493 // the address space of globals to 1. This does not apply to SPIRV Logical. 5494 if (((T.isAMDGPU() && !T.isAMDGCN()) || 5495 (T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical()))) && 5496 !DL.contains("-G") && !DL.starts_with("G")) { 5497 return DL.empty() ? std::string("G1") : (DL + "-G1").str(); 5498 } 5499 5500 if (T.isLoongArch64() || T.isRISCV64()) { 5501 // Make i32 a native type for 64-bit LoongArch and RISC-V. 5502 auto I = DL.find("-n64-"); 5503 if (I != StringRef::npos) 5504 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str(); 5505 return DL.str(); 5506 } 5507 5508 std::string Res = DL.str(); 5509 // AMDGCN data layout upgrades. 5510 if (T.isAMDGCN()) { 5511 // Define address spaces for constants. 5512 if (!DL.contains("-G") && !DL.starts_with("G")) 5513 Res.append(Res.empty() ? "G1" : "-G1"); 5514 5515 // Add missing non-integral declarations. 5516 // This goes before adding new address spaces to prevent incoherent string 5517 // values. 5518 if (!DL.contains("-ni") && !DL.starts_with("ni")) 5519 Res.append("-ni:7:8:9"); 5520 // Update ni:7 to ni:7:8:9. 5521 if (DL.ends_with("ni:7")) 5522 Res.append(":8:9"); 5523 if (DL.ends_with("ni:7:8")) 5524 Res.append(":9"); 5525 5526 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer 5527 // resources) An empty data layout has already been upgraded to G1 by now. 5528 if (!DL.contains("-p7") && !DL.starts_with("p7")) 5529 Res.append("-p7:160:256:256:32"); 5530 if (!DL.contains("-p8") && !DL.starts_with("p8")) 5531 Res.append("-p8:128:128"); 5532 if (!DL.contains("-p9") && !DL.starts_with("p9")) 5533 Res.append("-p9:192:256:256:32"); 5534 5535 return Res; 5536 } 5537 5538 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() { 5539 // If the datalayout matches the expected format, add pointer size address 5540 // spaces to the datalayout. 5541 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"}; 5542 if (!DL.contains(AddrSpaces)) { 5543 SmallVector<StringRef, 4> Groups; 5544 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$"); 5545 if (R.match(Res, &Groups)) 5546 Res = (Groups[1] + AddrSpaces + Groups[3]).str(); 5547 } 5548 }; 5549 5550 // AArch64 data layout upgrades. 5551 if (T.isAArch64()) { 5552 // Add "-Fn32" 5553 if (!DL.empty() && !DL.contains("-Fn32")) 5554 Res.append("-Fn32"); 5555 AddPtr32Ptr64AddrSpaces(); 5556 return Res; 5557 } 5558 5559 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() || 5560 T.isWasm()) { 5561 // Mips64 with o32 ABI did not add "-i128:128". 5562 // Add "-i128:128" 5563 std::string I64 = "-i64:64"; 5564 std::string I128 = "-i128:128"; 5565 if (!StringRef(Res).contains(I128)) { 5566 size_t Pos = Res.find(I64); 5567 if (Pos != size_t(-1)) 5568 Res.insert(Pos + I64.size(), I128); 5569 } 5570 return Res; 5571 } 5572 5573 if (!T.isX86()) 5574 return Res; 5575 5576 AddPtr32Ptr64AddrSpaces(); 5577 5578 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc 5579 // for i128 operations prior to this being reflected in the data layout, and 5580 // clang mostly produced LLVM IR that already aligned i128 to 16 byte 5581 // boundaries, so although this is a breaking change, the upgrade is expected 5582 // to fix more IR than it breaks. 5583 // Intel MCU is an exception and uses 4-byte-alignment. 5584 if (!T.isOSIAMCU()) { 5585 std::string I128 = "-i128:128"; 5586 if (StringRef Ref = Res; !Ref.contains(I128)) { 5587 SmallVector<StringRef, 4> Groups; 5588 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$"); 5589 if (R.match(Res, &Groups)) 5590 Res = (Groups[1] + I128 + Groups[3]).str(); 5591 } 5592 } 5593 5594 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes. 5595 // Raising the alignment is safe because Clang did not produce f80 values in 5596 // the MSVC environment before this upgrade was added. 5597 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) { 5598 StringRef Ref = Res; 5599 auto I = Ref.find("-f80:32-"); 5600 if (I != StringRef::npos) 5601 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str(); 5602 } 5603 5604 return Res; 5605 } 5606 5607 void llvm::UpgradeAttributes(AttrBuilder &B) { 5608 StringRef FramePointer; 5609 Attribute A = B.getAttribute("no-frame-pointer-elim"); 5610 if (A.isValid()) { 5611 // The value can be "true" or "false". 5612 FramePointer = A.getValueAsString() == "true" ? "all" : "none"; 5613 B.removeAttribute("no-frame-pointer-elim"); 5614 } 5615 if (B.contains("no-frame-pointer-elim-non-leaf")) { 5616 // The value is ignored. "no-frame-pointer-elim"="true" takes priority. 5617 if (FramePointer != "all") 5618 FramePointer = "non-leaf"; 5619 B.removeAttribute("no-frame-pointer-elim-non-leaf"); 5620 } 5621 if (!FramePointer.empty()) 5622 B.addAttribute("frame-pointer", FramePointer); 5623 5624 A = B.getAttribute("null-pointer-is-valid"); 5625 if (A.isValid()) { 5626 // The value can be "true" or "false". 5627 bool NullPointerIsValid = A.getValueAsString() == "true"; 5628 B.removeAttribute("null-pointer-is-valid"); 5629 if (NullPointerIsValid) 5630 B.addAttribute(Attribute::NullPointerIsValid); 5631 } 5632 } 5633 5634 void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) { 5635 // clang.arc.attachedcall bundles are now required to have an operand. 5636 // If they don't, it's okay to drop them entirely: when there is an operand, 5637 // the "attachedcall" is meaningful and required, but without an operand, 5638 // it's just a marker NOP. Dropping it merely prevents an optimization. 5639 erase_if(Bundles, [&](OperandBundleDef &OBD) { 5640 return OBD.getTag() == "clang.arc.attachedcall" && 5641 OBD.inputs().empty(); 5642 }); 5643 } 5644