1 //===-- PPCIntrinsicCall.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Helper routines for constructing the FIR dialect of MLIR for PowerPC 10 // intrinsics. Extensive use of MLIR interfaces and MLIR's coding style 11 // (https://mlir.llvm.org/getting_started/DeveloperGuide/) is used in this 12 // module. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "flang/Optimizer/Builder/PPCIntrinsicCall.h" 17 #include "flang/Evaluate/common.h" 18 #include "flang/Optimizer/Builder/FIRBuilder.h" 19 #include "flang/Optimizer/Builder/MutableBox.h" 20 #include "mlir/Dialect/Vector/IR/VectorOps.h" 21 22 namespace fir { 23 24 using PI = PPCIntrinsicLibrary; 25 26 // PPC specific intrinsic handlers. 27 static constexpr IntrinsicHandler ppcHandlers[]{ 28 {"__ppc_mma_assemble_acc", 29 static_cast<IntrinsicLibrary::SubroutineGenerator>( 30 &PI::genMmaIntr<MMAOp::AssembleAcc, MMAHandlerOp::SubToFunc>), 31 {{{"acc", asAddr}, 32 {"arg1", asValue}, 33 {"arg2", asValue}, 34 {"arg3", asValue}, 35 {"arg4", asValue}}}, 36 /*isElemental=*/true}, 37 {"__ppc_mma_assemble_pair", 38 static_cast<IntrinsicLibrary::SubroutineGenerator>( 39 &PI::genMmaIntr<MMAOp::AssemblePair, MMAHandlerOp::SubToFunc>), 40 {{{"pair", asAddr}, {"arg1", asValue}, {"arg2", asValue}}}, 41 /*isElemental=*/true}, 42 {"__ppc_mma_build_acc", 43 static_cast<IntrinsicLibrary::SubroutineGenerator>( 44 &PI::genMmaIntr<MMAOp::AssembleAcc, 45 MMAHandlerOp::SubToFuncReverseArgOnLE>), 46 {{{"acc", asAddr}, 47 {"arg1", asValue}, 48 {"arg2", asValue}, 49 {"arg3", asValue}, 50 {"arg4", asValue}}}, 51 /*isElemental=*/true}, 52 {"__ppc_mma_disassemble_acc", 53 static_cast<IntrinsicLibrary::SubroutineGenerator>( 54 &PI::genMmaIntr<MMAOp::DisassembleAcc, MMAHandlerOp::SubToFunc>), 55 {{{"data", asAddr}, {"acc", asValue}}}, 56 /*isElemental=*/true}, 57 {"__ppc_mma_disassemble_pair", 58 static_cast<IntrinsicLibrary::SubroutineGenerator>( 59 &PI::genMmaIntr<MMAOp::DisassemblePair, MMAHandlerOp::SubToFunc>), 60 {{{"data", asAddr}, {"pair", asValue}}}, 61 /*isElemental=*/true}, 62 {"__ppc_mma_pmxvbf16ger2_", 63 static_cast<IntrinsicLibrary::SubroutineGenerator>( 64 &PI::genMmaIntr<MMAOp::Pmxvbf16ger2, MMAHandlerOp::SubToFunc>), 65 {{{"acc", asAddr}, 66 {"a", asValue}, 67 {"b", asValue}, 68 {"xmask", asValue}, 69 {"ymask", asValue}, 70 {"pmask", asValue}}}, 71 /*isElemental=*/true}, 72 {"__ppc_mma_pmxvbf16ger2nn", 73 static_cast<IntrinsicLibrary::SubroutineGenerator>( 74 &PI::genMmaIntr<MMAOp::Pmxvbf16ger2nn, 75 MMAHandlerOp::FirstArgIsResult>), 76 {{{"acc", asAddr}, 77 {"a", asValue}, 78 {"b", asValue}, 79 {"xmask", asValue}, 80 {"ymask", asValue}, 81 {"pmask", asValue}}}, 82 /*isElemental=*/true}, 83 {"__ppc_mma_pmxvbf16ger2np", 84 static_cast<IntrinsicLibrary::SubroutineGenerator>( 85 &PI::genMmaIntr<MMAOp::Pmxvbf16ger2np, 86 MMAHandlerOp::FirstArgIsResult>), 87 {{{"acc", asAddr}, 88 {"a", asValue}, 89 {"b", asValue}, 90 {"xmask", asValue}, 91 {"ymask", asValue}, 92 {"pmask", asValue}}}, 93 /*isElemental=*/true}, 94 {"__ppc_mma_pmxvbf16ger2pn", 95 static_cast<IntrinsicLibrary::SubroutineGenerator>( 96 &PI::genMmaIntr<MMAOp::Pmxvbf16ger2pn, 97 MMAHandlerOp::FirstArgIsResult>), 98 {{{"acc", asAddr}, 99 {"a", asValue}, 100 {"b", asValue}, 101 {"xmask", asValue}, 102 {"ymask", asValue}, 103 {"pmask", asValue}}}, 104 /*isElemental=*/true}, 105 {"__ppc_mma_pmxvbf16ger2pp", 106 static_cast<IntrinsicLibrary::SubroutineGenerator>( 107 &PI::genMmaIntr<MMAOp::Pmxvbf16ger2pp, 108 MMAHandlerOp::FirstArgIsResult>), 109 {{{"acc", asAddr}, 110 {"a", asValue}, 111 {"b", asValue}, 112 {"xmask", asValue}, 113 {"ymask", asValue}, 114 {"pmask", asValue}}}, 115 /*isElemental=*/true}, 116 {"__ppc_mma_pmxvf16ger2_", 117 static_cast<IntrinsicLibrary::SubroutineGenerator>( 118 &PI::genMmaIntr<MMAOp::Pmxvf16ger2, MMAHandlerOp::SubToFunc>), 119 {{{"acc", asAddr}, 120 {"a", asValue}, 121 {"b", asValue}, 122 {"xmask", asValue}, 123 {"ymask", asValue}, 124 {"pmask", asValue}}}, 125 /*isElemental=*/true}, 126 {"__ppc_mma_pmxvf16ger2nn", 127 static_cast<IntrinsicLibrary::SubroutineGenerator>( 128 &PI::genMmaIntr<MMAOp::Pmxvf16ger2nn, MMAHandlerOp::FirstArgIsResult>), 129 {{{"acc", asAddr}, 130 {"a", asValue}, 131 {"b", asValue}, 132 {"xmask", asValue}, 133 {"ymask", asValue}, 134 {"pmask", asValue}}}, 135 /*isElemental=*/true}, 136 {"__ppc_mma_pmxvf16ger2np", 137 static_cast<IntrinsicLibrary::SubroutineGenerator>( 138 &PI::genMmaIntr<MMAOp::Pmxvf16ger2np, MMAHandlerOp::FirstArgIsResult>), 139 {{{"acc", asAddr}, 140 {"a", asValue}, 141 {"b", asValue}, 142 {"xmask", asValue}, 143 {"ymask", asValue}, 144 {"pmask", asValue}}}, 145 /*isElemental=*/true}, 146 {"__ppc_mma_pmxvf16ger2pn", 147 static_cast<IntrinsicLibrary::SubroutineGenerator>( 148 &PI::genMmaIntr<MMAOp::Pmxvf16ger2pn, MMAHandlerOp::FirstArgIsResult>), 149 {{{"acc", asAddr}, 150 {"a", asValue}, 151 {"b", asValue}, 152 {"xmask", asValue}, 153 {"ymask", asValue}, 154 {"pmask", asValue}}}, 155 /*isElemental=*/true}, 156 {"__ppc_mma_pmxvf16ger2pp", 157 static_cast<IntrinsicLibrary::SubroutineGenerator>( 158 &PI::genMmaIntr<MMAOp::Pmxvf16ger2pp, MMAHandlerOp::FirstArgIsResult>), 159 {{{"acc", asAddr}, 160 {"a", asValue}, 161 {"b", asValue}, 162 {"xmask", asValue}, 163 {"ymask", asValue}, 164 {"pmask", asValue}}}, 165 /*isElemental=*/true}, 166 {"__ppc_mma_pmxvf32ger", 167 static_cast<IntrinsicLibrary::SubroutineGenerator>( 168 &PI::genMmaIntr<MMAOp::Pmxvf32ger, MMAHandlerOp::SubToFunc>), 169 {{{"acc", asAddr}, 170 {"a", asValue}, 171 {"b", asValue}, 172 {"xmask", asValue}, 173 {"ymask", asValue}}}, 174 /*isElemental=*/true}, 175 {"__ppc_mma_pmxvf32gernn", 176 static_cast<IntrinsicLibrary::SubroutineGenerator>( 177 &PI::genMmaIntr<MMAOp::Pmxvf32gernn, MMAHandlerOp::FirstArgIsResult>), 178 {{{"acc", asAddr}, 179 {"a", asValue}, 180 {"b", asValue}, 181 {"xmask", asValue}, 182 {"ymask", asValue}}}, 183 /*isElemental=*/true}, 184 {"__ppc_mma_pmxvf32gernp", 185 static_cast<IntrinsicLibrary::SubroutineGenerator>( 186 &PI::genMmaIntr<MMAOp::Pmxvf32gernp, MMAHandlerOp::FirstArgIsResult>), 187 {{{"acc", asAddr}, 188 {"a", asValue}, 189 {"b", asValue}, 190 {"xmask", asValue}, 191 {"ymask", asValue}}}, 192 /*isElemental=*/true}, 193 {"__ppc_mma_pmxvf32gerpn", 194 static_cast<IntrinsicLibrary::SubroutineGenerator>( 195 &PI::genMmaIntr<MMAOp::Pmxvf32gerpn, MMAHandlerOp::FirstArgIsResult>), 196 {{{"acc", asAddr}, 197 {"a", asValue}, 198 {"b", asValue}, 199 {"xmask", asValue}, 200 {"ymask", asValue}}}, 201 /*isElemental=*/true}, 202 {"__ppc_mma_pmxvf32gerpp", 203 static_cast<IntrinsicLibrary::SubroutineGenerator>( 204 &PI::genMmaIntr<MMAOp::Pmxvf32gerpp, MMAHandlerOp::FirstArgIsResult>), 205 {{{"acc", asAddr}, 206 {"a", asValue}, 207 {"b", asValue}, 208 {"xmask", asValue}, 209 {"ymask", asValue}}}, 210 /*isElemental=*/true}, 211 {"__ppc_mma_pmxvf64ger", 212 static_cast<IntrinsicLibrary::SubroutineGenerator>( 213 &PI::genMmaIntr<MMAOp::Pmxvf64ger, MMAHandlerOp::SubToFunc>), 214 {{{"acc", asAddr}, 215 {"a", asValue}, 216 {"b", asValue}, 217 {"xmask", asValue}, 218 {"ymask", asValue}}}, 219 /*isElemental=*/true}, 220 {"__ppc_mma_pmxvf64gernn", 221 static_cast<IntrinsicLibrary::SubroutineGenerator>( 222 &PI::genMmaIntr<MMAOp::Pmxvf64gernn, MMAHandlerOp::FirstArgIsResult>), 223 {{{"acc", asAddr}, 224 {"a", asValue}, 225 {"b", asValue}, 226 {"xmask", asValue}, 227 {"ymask", asValue}}}, 228 /*isElemental=*/true}, 229 {"__ppc_mma_pmxvf64gernp", 230 static_cast<IntrinsicLibrary::SubroutineGenerator>( 231 &PI::genMmaIntr<MMAOp::Pmxvf64gernp, MMAHandlerOp::FirstArgIsResult>), 232 {{{"acc", asAddr}, 233 {"a", asValue}, 234 {"b", asValue}, 235 {"xmask", asValue}, 236 {"ymask", asValue}}}, 237 /*isElemental=*/true}, 238 {"__ppc_mma_pmxvf64gerpn", 239 static_cast<IntrinsicLibrary::SubroutineGenerator>( 240 &PI::genMmaIntr<MMAOp::Pmxvf64gerpn, MMAHandlerOp::FirstArgIsResult>), 241 {{{"acc", asAddr}, 242 {"a", asValue}, 243 {"b", asValue}, 244 {"xmask", asValue}, 245 {"ymask", asValue}}}, 246 /*isElemental=*/true}, 247 {"__ppc_mma_pmxvf64gerpp", 248 static_cast<IntrinsicLibrary::SubroutineGenerator>( 249 &PI::genMmaIntr<MMAOp::Pmxvf64gerpp, MMAHandlerOp::FirstArgIsResult>), 250 {{{"acc", asAddr}, 251 {"a", asValue}, 252 {"b", asValue}, 253 {"xmask", asValue}, 254 {"ymask", asValue}}}, 255 /*isElemental=*/true}, 256 {"__ppc_mma_pmxvi16ger2_", 257 static_cast<IntrinsicLibrary::SubroutineGenerator>( 258 &PI::genMmaIntr<MMAOp::Pmxvi16ger2, MMAHandlerOp::SubToFunc>), 259 {{{"acc", asAddr}, 260 {"a", asValue}, 261 {"b", asValue}, 262 {"xmask", asValue}, 263 {"ymask", asValue}, 264 {"pmask", asValue}}}, 265 /*isElemental=*/true}, 266 {"__ppc_mma_pmxvi16ger2pp", 267 static_cast<IntrinsicLibrary::SubroutineGenerator>( 268 &PI::genMmaIntr<MMAOp::Pmxvi16ger2pp, MMAHandlerOp::FirstArgIsResult>), 269 {{{"acc", asAddr}, 270 {"a", asValue}, 271 {"b", asValue}, 272 {"xmask", asValue}, 273 {"ymask", asValue}, 274 {"pmask", asValue}}}, 275 /*isElemental=*/true}, 276 {"__ppc_mma_pmxvi16ger2s", 277 static_cast<IntrinsicLibrary::SubroutineGenerator>( 278 &PI::genMmaIntr<MMAOp::Pmxvi16ger2s, MMAHandlerOp::SubToFunc>), 279 {{{"acc", asAddr}, 280 {"a", asValue}, 281 {"b", asValue}, 282 {"xmask", asValue}, 283 {"ymask", asValue}, 284 {"pmask", asValue}}}, 285 /*isElemental=*/true}, 286 {"__ppc_mma_pmxvi16ger2spp", 287 static_cast<IntrinsicLibrary::SubroutineGenerator>( 288 &PI::genMmaIntr<MMAOp::Pmxvi16ger2spp, 289 MMAHandlerOp::FirstArgIsResult>), 290 {{{"acc", asAddr}, 291 {"a", asValue}, 292 {"b", asValue}, 293 {"xmask", asValue}, 294 {"ymask", asValue}, 295 {"pmask", asValue}}}, 296 /*isElemental=*/true}, 297 {"__ppc_mma_pmxvi4ger8_", 298 static_cast<IntrinsicLibrary::SubroutineGenerator>( 299 &PI::genMmaIntr<MMAOp::Pmxvi4ger8, MMAHandlerOp::SubToFunc>), 300 {{{"acc", asAddr}, 301 {"a", asValue}, 302 {"b", asValue}, 303 {"xmask", asValue}, 304 {"ymask", asValue}, 305 {"pmask", asValue}}}, 306 /*isElemental=*/true}, 307 {"__ppc_mma_pmxvi4ger8pp", 308 static_cast<IntrinsicLibrary::SubroutineGenerator>( 309 &PI::genMmaIntr<MMAOp::Pmxvi4ger8pp, MMAHandlerOp::FirstArgIsResult>), 310 {{{"acc", asAddr}, 311 {"a", asValue}, 312 {"b", asValue}, 313 {"xmask", asValue}, 314 {"ymask", asValue}, 315 {"pmask", asValue}}}, 316 /*isElemental=*/true}, 317 {"__ppc_mma_pmxvi8ger4_", 318 static_cast<IntrinsicLibrary::SubroutineGenerator>( 319 &PI::genMmaIntr<MMAOp::Pmxvi8ger4, MMAHandlerOp::SubToFunc>), 320 {{{"acc", asAddr}, 321 {"a", asValue}, 322 {"b", asValue}, 323 {"xmask", asValue}, 324 {"ymask", asValue}, 325 {"pmask", asValue}}}, 326 /*isElemental=*/true}, 327 {"__ppc_mma_pmxvi8ger4pp", 328 static_cast<IntrinsicLibrary::SubroutineGenerator>( 329 &PI::genMmaIntr<MMAOp::Pmxvi8ger4pp, MMAHandlerOp::FirstArgIsResult>), 330 {{{"acc", asAddr}, 331 {"a", asValue}, 332 {"b", asValue}, 333 {"xmask", asValue}, 334 {"ymask", asValue}, 335 {"pmask", asValue}}}, 336 /*isElemental=*/true}, 337 {"__ppc_mma_pmxvi8ger4spp", 338 static_cast<IntrinsicLibrary::SubroutineGenerator>( 339 &PI::genMmaIntr<MMAOp::Pmxvi8ger4spp, MMAHandlerOp::FirstArgIsResult>), 340 {{{"acc", asAddr}, 341 {"a", asValue}, 342 {"b", asValue}, 343 {"xmask", asValue}, 344 {"ymask", asValue}, 345 {"pmask", asValue}}}, 346 /*isElemental=*/true}, 347 {"__ppc_mma_xvbf16ger2_", 348 static_cast<IntrinsicLibrary::SubroutineGenerator>( 349 &PI::genMmaIntr<MMAOp::Xvbf16ger2, MMAHandlerOp::SubToFunc>), 350 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 351 /*isElemental=*/true}, 352 {"__ppc_mma_xvbf16ger2nn", 353 static_cast<IntrinsicLibrary::SubroutineGenerator>( 354 &PI::genMmaIntr<MMAOp::Xvbf16ger2nn, MMAHandlerOp::FirstArgIsResult>), 355 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 356 /*isElemental=*/true}, 357 {"__ppc_mma_xvbf16ger2np", 358 static_cast<IntrinsicLibrary::SubroutineGenerator>( 359 &PI::genMmaIntr<MMAOp::Xvbf16ger2np, MMAHandlerOp::FirstArgIsResult>), 360 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 361 /*isElemental=*/true}, 362 {"__ppc_mma_xvbf16ger2pn", 363 static_cast<IntrinsicLibrary::SubroutineGenerator>( 364 &PI::genMmaIntr<MMAOp::Xvbf16ger2pn, MMAHandlerOp::FirstArgIsResult>), 365 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 366 /*isElemental=*/true}, 367 {"__ppc_mma_xvbf16ger2pp", 368 static_cast<IntrinsicLibrary::SubroutineGenerator>( 369 &PI::genMmaIntr<MMAOp::Xvbf16ger2pp, MMAHandlerOp::FirstArgIsResult>), 370 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 371 /*isElemental=*/true}, 372 {"__ppc_mma_xvf16ger2_", 373 static_cast<IntrinsicLibrary::SubroutineGenerator>( 374 &PI::genMmaIntr<MMAOp::Xvf16ger2, MMAHandlerOp::SubToFunc>), 375 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 376 /*isElemental=*/true}, 377 {"__ppc_mma_xvf16ger2nn", 378 static_cast<IntrinsicLibrary::SubroutineGenerator>( 379 &PI::genMmaIntr<MMAOp::Xvf16ger2nn, MMAHandlerOp::FirstArgIsResult>), 380 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 381 /*isElemental=*/true}, 382 {"__ppc_mma_xvf16ger2np", 383 static_cast<IntrinsicLibrary::SubroutineGenerator>( 384 &PI::genMmaIntr<MMAOp::Xvf16ger2np, MMAHandlerOp::FirstArgIsResult>), 385 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 386 /*isElemental=*/true}, 387 {"__ppc_mma_xvf16ger2pn", 388 static_cast<IntrinsicLibrary::SubroutineGenerator>( 389 &PI::genMmaIntr<MMAOp::Xvf16ger2pn, MMAHandlerOp::FirstArgIsResult>), 390 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 391 /*isElemental=*/true}, 392 {"__ppc_mma_xvf16ger2pp", 393 static_cast<IntrinsicLibrary::SubroutineGenerator>( 394 &PI::genMmaIntr<MMAOp::Xvf16ger2pp, MMAHandlerOp::FirstArgIsResult>), 395 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 396 /*isElemental=*/true}, 397 {"__ppc_mma_xvf32ger", 398 static_cast<IntrinsicLibrary::SubroutineGenerator>( 399 &PI::genMmaIntr<MMAOp::Xvf32ger, MMAHandlerOp::SubToFunc>), 400 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 401 /*isElemental=*/true}, 402 {"__ppc_mma_xvf32gernn", 403 static_cast<IntrinsicLibrary::SubroutineGenerator>( 404 &PI::genMmaIntr<MMAOp::Xvf32gernn, MMAHandlerOp::FirstArgIsResult>), 405 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 406 /*isElemental=*/true}, 407 {"__ppc_mma_xvf32gernp", 408 static_cast<IntrinsicLibrary::SubroutineGenerator>( 409 &PI::genMmaIntr<MMAOp::Xvf32gernp, MMAHandlerOp::FirstArgIsResult>), 410 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 411 /*isElemental=*/true}, 412 {"__ppc_mma_xvf32gerpn", 413 static_cast<IntrinsicLibrary::SubroutineGenerator>( 414 &PI::genMmaIntr<MMAOp::Xvf32gerpn, MMAHandlerOp::FirstArgIsResult>), 415 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 416 /*isElemental=*/true}, 417 {"__ppc_mma_xvf32gerpp", 418 static_cast<IntrinsicLibrary::SubroutineGenerator>( 419 &PI::genMmaIntr<MMAOp::Xvf32gerpp, MMAHandlerOp::FirstArgIsResult>), 420 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 421 /*isElemental=*/true}, 422 {"__ppc_mma_xvf64ger", 423 static_cast<IntrinsicLibrary::SubroutineGenerator>( 424 &PI::genMmaIntr<MMAOp::Xvf64ger, MMAHandlerOp::SubToFunc>), 425 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 426 /*isElemental=*/true}, 427 {"__ppc_mma_xvf64gernn", 428 static_cast<IntrinsicLibrary::SubroutineGenerator>( 429 &PI::genMmaIntr<MMAOp::Xvf64gernn, MMAHandlerOp::FirstArgIsResult>), 430 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 431 /*isElemental=*/true}, 432 {"__ppc_mma_xvf64gernp", 433 static_cast<IntrinsicLibrary::SubroutineGenerator>( 434 &PI::genMmaIntr<MMAOp::Xvf64gernp, MMAHandlerOp::FirstArgIsResult>), 435 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 436 /*isElemental=*/true}, 437 {"__ppc_mma_xvf64gerpn", 438 static_cast<IntrinsicLibrary::SubroutineGenerator>( 439 &PI::genMmaIntr<MMAOp::Xvf64gerpn, MMAHandlerOp::FirstArgIsResult>), 440 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 441 /*isElemental=*/true}, 442 {"__ppc_mma_xvf64gerpp", 443 static_cast<IntrinsicLibrary::SubroutineGenerator>( 444 &PI::genMmaIntr<MMAOp::Xvf64gerpp, MMAHandlerOp::FirstArgIsResult>), 445 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 446 /*isElemental=*/true}, 447 {"__ppc_mma_xvi16ger2_", 448 static_cast<IntrinsicLibrary::SubroutineGenerator>( 449 &PI::genMmaIntr<MMAOp::Xvi16ger2, MMAHandlerOp::SubToFunc>), 450 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 451 /*isElemental=*/true}, 452 {"__ppc_mma_xvi16ger2pp", 453 static_cast<IntrinsicLibrary::SubroutineGenerator>( 454 &PI::genMmaIntr<MMAOp::Xvi16ger2pp, MMAHandlerOp::FirstArgIsResult>), 455 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 456 /*isElemental=*/true}, 457 {"__ppc_mma_xvi16ger2s", 458 static_cast<IntrinsicLibrary::SubroutineGenerator>( 459 &PI::genMmaIntr<MMAOp::Xvi16ger2s, MMAHandlerOp::SubToFunc>), 460 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 461 /*isElemental=*/true}, 462 {"__ppc_mma_xvi16ger2spp", 463 static_cast<IntrinsicLibrary::SubroutineGenerator>( 464 &PI::genMmaIntr<MMAOp::Xvi16ger2spp, MMAHandlerOp::FirstArgIsResult>), 465 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 466 /*isElemental=*/true}, 467 {"__ppc_mma_xvi4ger8_", 468 static_cast<IntrinsicLibrary::SubroutineGenerator>( 469 &PI::genMmaIntr<MMAOp::Xvi4ger8, MMAHandlerOp::SubToFunc>), 470 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 471 /*isElemental=*/true}, 472 {"__ppc_mma_xvi4ger8pp", 473 static_cast<IntrinsicLibrary::SubroutineGenerator>( 474 &PI::genMmaIntr<MMAOp::Xvi4ger8pp, MMAHandlerOp::FirstArgIsResult>), 475 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 476 /*isElemental=*/true}, 477 {"__ppc_mma_xvi8ger4_", 478 static_cast<IntrinsicLibrary::SubroutineGenerator>( 479 &PI::genMmaIntr<MMAOp::Xvi8ger4, MMAHandlerOp::SubToFunc>), 480 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 481 /*isElemental=*/true}, 482 {"__ppc_mma_xvi8ger4pp", 483 static_cast<IntrinsicLibrary::SubroutineGenerator>( 484 &PI::genMmaIntr<MMAOp::Xvi8ger4pp, MMAHandlerOp::FirstArgIsResult>), 485 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 486 /*isElemental=*/true}, 487 {"__ppc_mma_xvi8ger4spp", 488 static_cast<IntrinsicLibrary::SubroutineGenerator>( 489 &PI::genMmaIntr<MMAOp::Xvi8ger4spp, MMAHandlerOp::FirstArgIsResult>), 490 {{{"acc", asAddr}, {"a", asValue}, {"b", asValue}}}, 491 /*isElemental=*/true}, 492 {"__ppc_mma_xxmfacc", 493 static_cast<IntrinsicLibrary::SubroutineGenerator>( 494 &PI::genMmaIntr<MMAOp::Xxmfacc, MMAHandlerOp::FirstArgIsResult>), 495 {{{"acc", asAddr}}}, 496 /*isElemental=*/true}, 497 {"__ppc_mma_xxmtacc", 498 static_cast<IntrinsicLibrary::SubroutineGenerator>( 499 &PI::genMmaIntr<MMAOp::Xxmtacc, MMAHandlerOp::FirstArgIsResult>), 500 {{{"acc", asAddr}}}, 501 /*isElemental=*/true}, 502 {"__ppc_mma_xxsetaccz", 503 static_cast<IntrinsicLibrary::SubroutineGenerator>( 504 &PI::genMmaIntr<MMAOp::Xxsetaccz, MMAHandlerOp::SubToFunc>), 505 {{{"acc", asAddr}}}, 506 /*isElemental=*/true}, 507 {"__ppc_mtfsf", 508 static_cast<IntrinsicLibrary::SubroutineGenerator>(&PI::genMtfsf<false>), 509 {{{"mask", asValue}, {"r", asValue}}}, 510 /*isElemental=*/false}, 511 {"__ppc_mtfsfi", 512 static_cast<IntrinsicLibrary::SubroutineGenerator>(&PI::genMtfsf<true>), 513 {{{"bf", asValue}, {"i", asValue}}}, 514 /*isElemental=*/false}, 515 {"__ppc_vec_abs", 516 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecAbs), 517 {{{"arg1", asValue}}}, 518 /*isElemental=*/true}, 519 {"__ppc_vec_add", 520 static_cast<IntrinsicLibrary::ExtendedGenerator>( 521 &PI::genVecAddAndMulSubXor<VecOp::Add>), 522 {{{"arg1", asValue}, {"arg2", asValue}}}, 523 /*isElemental=*/true}, 524 {"__ppc_vec_and", 525 static_cast<IntrinsicLibrary::ExtendedGenerator>( 526 &PI::genVecAddAndMulSubXor<VecOp::And>), 527 {{{"arg1", asValue}, {"arg2", asValue}}}, 528 /*isElemental=*/true}, 529 {"__ppc_vec_any_ge", 530 static_cast<IntrinsicLibrary::ExtendedGenerator>( 531 &PI::genVecAnyCompare<VecOp::Anyge>), 532 {{{"arg1", asValue}, {"arg2", asValue}}}, 533 /*isElemental=*/true}, 534 {"__ppc_vec_cmpge", 535 static_cast<IntrinsicLibrary::ExtendedGenerator>( 536 &PI::genVecCmp<VecOp::Cmpge>), 537 {{{"arg1", asValue}, {"arg2", asValue}}}, 538 /*isElemental=*/true}, 539 {"__ppc_vec_cmpgt", 540 static_cast<IntrinsicLibrary::ExtendedGenerator>( 541 &PI::genVecCmp<VecOp::Cmpgt>), 542 {{{"arg1", asValue}, {"arg2", asValue}}}, 543 /*isElemental=*/true}, 544 {"__ppc_vec_cmple", 545 static_cast<IntrinsicLibrary::ExtendedGenerator>( 546 &PI::genVecCmp<VecOp::Cmple>), 547 {{{"arg1", asValue}, {"arg2", asValue}}}, 548 /*isElemental=*/true}, 549 {"__ppc_vec_cmplt", 550 static_cast<IntrinsicLibrary::ExtendedGenerator>( 551 &PI::genVecCmp<VecOp::Cmplt>), 552 {{{"arg1", asValue}, {"arg2", asValue}}}, 553 /*isElemental=*/true}, 554 {"__ppc_vec_convert", 555 static_cast<IntrinsicLibrary::ExtendedGenerator>( 556 &PI::genVecConvert<VecOp::Convert>), 557 {{{"v", asValue}, {"mold", asValue}}}, 558 /*isElemental=*/false}, 559 {"__ppc_vec_ctf", 560 static_cast<IntrinsicLibrary::ExtendedGenerator>( 561 &PI::genVecConvert<VecOp::Ctf>), 562 {{{"arg1", asValue}, {"arg2", asValue}}}, 563 /*isElemental=*/true}, 564 {"__ppc_vec_cvf", 565 static_cast<IntrinsicLibrary::ExtendedGenerator>( 566 &PI::genVecConvert<VecOp::Cvf>), 567 {{{"arg1", asValue}}}, 568 /*isElemental=*/true}, 569 {"__ppc_vec_extract", 570 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecExtract), 571 {{{"arg1", asValue}, {"arg2", asValue}}}, 572 /*isElemental=*/true}, 573 {"__ppc_vec_insert", 574 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecInsert), 575 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, 576 /*isElemental=*/true}, 577 {"__ppc_vec_ld", 578 static_cast<IntrinsicLibrary::ExtendedGenerator>( 579 &PI::genVecLdCallGrp<VecOp::Ld>), 580 {{{"arg1", asValue}, {"arg2", asAddr}}}, 581 /*isElemental=*/false}, 582 {"__ppc_vec_lde", 583 static_cast<IntrinsicLibrary::ExtendedGenerator>( 584 &PI::genVecLdCallGrp<VecOp::Lde>), 585 {{{"arg1", asValue}, {"arg2", asAddr}}}, 586 /*isElemental=*/false}, 587 {"__ppc_vec_ldl", 588 static_cast<IntrinsicLibrary::ExtendedGenerator>( 589 &PI::genVecLdCallGrp<VecOp::Ldl>), 590 {{{"arg1", asValue}, {"arg2", asAddr}}}, 591 /*isElemental=*/false}, 592 {"__ppc_vec_lvsl", 593 static_cast<IntrinsicLibrary::ExtendedGenerator>( 594 &PI::genVecLvsGrp<VecOp::Lvsl>), 595 {{{"arg1", asValue}, {"arg2", asAddr}}}, 596 /*isElemental=*/false}, 597 {"__ppc_vec_lvsr", 598 static_cast<IntrinsicLibrary::ExtendedGenerator>( 599 &PI::genVecLvsGrp<VecOp::Lvsr>), 600 {{{"arg1", asValue}, {"arg2", asAddr}}}, 601 /*isElemental=*/false}, 602 {"__ppc_vec_lxv", 603 static_cast<IntrinsicLibrary::ExtendedGenerator>( 604 &PI::genVecLdNoCallGrp<VecOp::Lxv>), 605 {{{"arg1", asValue}, {"arg2", asAddr}}}, 606 /*isElemental=*/false}, 607 {"__ppc_vec_lxvp", 608 static_cast<IntrinsicLibrary::ExtendedGenerator>( 609 &PI::genVecLdCallGrp<VecOp::Lxvp>), 610 {{{"arg1", asValue}, {"arg2", asAddr}}}, 611 /*isElemental=*/false}, 612 {"__ppc_vec_mergeh", 613 static_cast<IntrinsicLibrary::ExtendedGenerator>( 614 &PI::genVecMerge<VecOp::Mergeh>), 615 {{{"arg1", asValue}, {"arg2", asValue}}}, 616 /*isElemental=*/true}, 617 {"__ppc_vec_mergel", 618 static_cast<IntrinsicLibrary::ExtendedGenerator>( 619 &PI::genVecMerge<VecOp::Mergel>), 620 {{{"arg1", asValue}, {"arg2", asValue}}}, 621 /*isElemental=*/true}, 622 {"__ppc_vec_msub", 623 static_cast<IntrinsicLibrary::ExtendedGenerator>( 624 &PI::genVecNmaddMsub<VecOp::Msub>), 625 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, 626 /*isElemental=*/true}, 627 {"__ppc_vec_mul", 628 static_cast<IntrinsicLibrary::ExtendedGenerator>( 629 &PI::genVecAddAndMulSubXor<VecOp::Mul>), 630 {{{"arg1", asValue}, {"arg2", asValue}}}, 631 /*isElemental=*/true}, 632 {"__ppc_vec_nmadd", 633 static_cast<IntrinsicLibrary::ExtendedGenerator>( 634 &PI::genVecNmaddMsub<VecOp::Nmadd>), 635 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, 636 /*isElemental=*/true}, 637 {"__ppc_vec_perm", 638 static_cast<IntrinsicLibrary::ExtendedGenerator>( 639 &PI::genVecPerm<VecOp::Perm>), 640 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, 641 /*isElemental=*/true}, 642 {"__ppc_vec_permi", 643 static_cast<IntrinsicLibrary::ExtendedGenerator>( 644 &PI::genVecPerm<VecOp::Permi>), 645 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, 646 /*isElemental=*/true}, 647 {"__ppc_vec_sel", 648 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecSel), 649 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, 650 /*isElemental=*/true}, 651 {"__ppc_vec_sl", 652 static_cast<IntrinsicLibrary::ExtendedGenerator>( 653 &PI::genVecShift<VecOp::Sl>), 654 {{{"arg1", asValue}, {"arg2", asValue}}}, 655 /*isElemental=*/true}, 656 {"__ppc_vec_sld", 657 static_cast<IntrinsicLibrary::ExtendedGenerator>( 658 &PI::genVecShift<VecOp::Sld>), 659 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, 660 /*isElemental=*/true}, 661 {"__ppc_vec_sldw", 662 static_cast<IntrinsicLibrary::ExtendedGenerator>( 663 &PI::genVecShift<VecOp::Sldw>), 664 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asValue}}}, 665 /*isElemental=*/true}, 666 {"__ppc_vec_sll", 667 static_cast<IntrinsicLibrary::ExtendedGenerator>( 668 &PI::genVecShift<VecOp::Sll>), 669 {{{"arg1", asValue}, {"arg2", asValue}}}, 670 /*isElemental=*/true}, 671 {"__ppc_vec_slo", 672 static_cast<IntrinsicLibrary::ExtendedGenerator>( 673 &PI::genVecShift<VecOp::Slo>), 674 {{{"arg1", asValue}, {"arg2", asValue}}}, 675 /*isElemental=*/true}, 676 {"__ppc_vec_splat", 677 static_cast<IntrinsicLibrary::ExtendedGenerator>( 678 &PI::genVecSplat<VecOp::Splat>), 679 {{{"arg1", asValue}, {"arg2", asValue}}}, 680 /*isElemental=*/true}, 681 {"__ppc_vec_splat_s32_", 682 static_cast<IntrinsicLibrary::ExtendedGenerator>( 683 &PI::genVecSplat<VecOp::Splat_s32>), 684 {{{"arg1", asValue}}}, 685 /*isElemental=*/true}, 686 {"__ppc_vec_splats", 687 static_cast<IntrinsicLibrary::ExtendedGenerator>( 688 &PI::genVecSplat<VecOp::Splats>), 689 {{{"arg1", asValue}}}, 690 /*isElemental=*/true}, 691 {"__ppc_vec_sr", 692 static_cast<IntrinsicLibrary::ExtendedGenerator>( 693 &PI::genVecShift<VecOp::Sr>), 694 {{{"arg1", asValue}, {"arg2", asValue}}}, 695 /*isElemental=*/true}, 696 {"__ppc_vec_srl", 697 static_cast<IntrinsicLibrary::ExtendedGenerator>( 698 &PI::genVecShift<VecOp::Srl>), 699 {{{"arg1", asValue}, {"arg2", asValue}}}, 700 /*isElemental=*/true}, 701 {"__ppc_vec_sro", 702 static_cast<IntrinsicLibrary::ExtendedGenerator>( 703 &PI::genVecShift<VecOp::Sro>), 704 {{{"arg1", asValue}, {"arg2", asValue}}}, 705 /*isElemental=*/true}, 706 {"__ppc_vec_st", 707 static_cast<IntrinsicLibrary::SubroutineGenerator>( 708 &PI::genVecStore<VecOp::St>), 709 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, 710 /*isElemental=*/false}, 711 {"__ppc_vec_ste", 712 static_cast<IntrinsicLibrary::SubroutineGenerator>( 713 &PI::genVecStore<VecOp::Ste>), 714 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, 715 /*isElemental=*/false}, 716 {"__ppc_vec_stxv", 717 static_cast<IntrinsicLibrary::SubroutineGenerator>( 718 &PI::genVecXStore<VecOp::Stxv>), 719 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, 720 /*isElemental=*/false}, 721 {"__ppc_vec_stxvp", 722 static_cast<IntrinsicLibrary::SubroutineGenerator>( 723 &PI::genVecStore<VecOp::Stxvp>), 724 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, 725 /*isElemental=*/false}, 726 {"__ppc_vec_sub", 727 static_cast<IntrinsicLibrary::ExtendedGenerator>( 728 &PI::genVecAddAndMulSubXor<VecOp::Sub>), 729 {{{"arg1", asValue}, {"arg2", asValue}}}, 730 /*isElemental=*/true}, 731 {"__ppc_vec_xl", 732 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlGrp), 733 {{{"arg1", asValue}, {"arg2", asAddr}}}, 734 /*isElemental=*/false}, 735 {"__ppc_vec_xl_be", 736 static_cast<IntrinsicLibrary::ExtendedGenerator>( 737 &PI::genVecLdNoCallGrp<VecOp::Xlbe>), 738 {{{"arg1", asValue}, {"arg2", asAddr}}}, 739 /*isElemental=*/false}, 740 {"__ppc_vec_xld2_", 741 static_cast<IntrinsicLibrary::ExtendedGenerator>( 742 &PI::genVecLdCallGrp<VecOp::Xld2>), 743 {{{"arg1", asValue}, {"arg2", asAddr}}}, 744 /*isElemental=*/false}, 745 {"__ppc_vec_xlds", 746 static_cast<IntrinsicLibrary::ExtendedGenerator>(&PI::genVecXlds), 747 {{{"arg1", asValue}, {"arg2", asAddr}}}, 748 /*isElemental=*/false}, 749 {"__ppc_vec_xlw4_", 750 static_cast<IntrinsicLibrary::ExtendedGenerator>( 751 &PI::genVecLdCallGrp<VecOp::Xlw4>), 752 {{{"arg1", asValue}, {"arg2", asAddr}}}, 753 /*isElemental=*/false}, 754 {"__ppc_vec_xor", 755 static_cast<IntrinsicLibrary::ExtendedGenerator>( 756 &PI::genVecAddAndMulSubXor<VecOp::Xor>), 757 {{{"arg1", asValue}, {"arg2", asValue}}}, 758 /*isElemental=*/true}, 759 {"__ppc_vec_xst", 760 static_cast<IntrinsicLibrary::SubroutineGenerator>( 761 &PI::genVecXStore<VecOp::Xst>), 762 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, 763 /*isElemental=*/false}, 764 {"__ppc_vec_xst_be", 765 static_cast<IntrinsicLibrary::SubroutineGenerator>( 766 &PI::genVecXStore<VecOp::Xst_be>), 767 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, 768 /*isElemental=*/false}, 769 {"__ppc_vec_xstd2_", 770 static_cast<IntrinsicLibrary::SubroutineGenerator>( 771 &PI::genVecXStore<VecOp::Xstd2>), 772 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, 773 /*isElemental=*/false}, 774 {"__ppc_vec_xstw4_", 775 static_cast<IntrinsicLibrary::SubroutineGenerator>( 776 &PI::genVecXStore<VecOp::Xstw4>), 777 {{{"arg1", asValue}, {"arg2", asValue}, {"arg3", asAddr}}}, 778 /*isElemental=*/false}, 779 }; 780 781 static constexpr MathOperation ppcMathOperations[] = { 782 // fcfi is just another name for fcfid, there is no llvm.ppc.fcfi. 783 {"__ppc_fcfi", "llvm.ppc.fcfid", genFuncType<Ty::Real<8>, Ty::Real<8>>, 784 genLibCall}, 785 {"__ppc_fcfid", "llvm.ppc.fcfid", genFuncType<Ty::Real<8>, Ty::Real<8>>, 786 genLibCall}, 787 {"__ppc_fcfud", "llvm.ppc.fcfud", genFuncType<Ty::Real<8>, Ty::Real<8>>, 788 genLibCall}, 789 {"__ppc_fctid", "llvm.ppc.fctid", genFuncType<Ty::Real<8>, Ty::Real<8>>, 790 genLibCall}, 791 {"__ppc_fctidz", "llvm.ppc.fctidz", genFuncType<Ty::Real<8>, Ty::Real<8>>, 792 genLibCall}, 793 {"__ppc_fctiw", "llvm.ppc.fctiw", genFuncType<Ty::Real<8>, Ty::Real<8>>, 794 genLibCall}, 795 {"__ppc_fctiwz", "llvm.ppc.fctiwz", genFuncType<Ty::Real<8>, Ty::Real<8>>, 796 genLibCall}, 797 {"__ppc_fctudz", "llvm.ppc.fctudz", genFuncType<Ty::Real<8>, Ty::Real<8>>, 798 genLibCall}, 799 {"__ppc_fctuwz", "llvm.ppc.fctuwz", genFuncType<Ty::Real<8>, Ty::Real<8>>, 800 genLibCall}, 801 {"__ppc_fmadd", "llvm.fma.f32", 802 genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>, 803 genMathOp<mlir::math::FmaOp>}, 804 {"__ppc_fmadd", "llvm.fma.f64", 805 genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>, 806 genMathOp<mlir::math::FmaOp>}, 807 {"__ppc_fmsub", "llvm.ppc.fmsubs", 808 genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>, 809 genLibCall}, 810 {"__ppc_fmsub", "llvm.ppc.fmsub", 811 genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>, 812 genLibCall}, 813 {"__ppc_fnabs", "llvm.ppc.fnabss", genFuncType<Ty::Real<4>, Ty::Real<4>>, 814 genLibCall}, 815 {"__ppc_fnabs", "llvm.ppc.fnabs", genFuncType<Ty::Real<8>, Ty::Real<8>>, 816 genLibCall}, 817 {"__ppc_fnmadd", "llvm.ppc.fnmadds", 818 genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>, 819 genLibCall}, 820 {"__ppc_fnmadd", "llvm.ppc.fnmadd", 821 genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>, 822 genLibCall}, 823 {"__ppc_fnmsub", "llvm.ppc.fnmsub.f32", 824 genFuncType<Ty::Real<4>, Ty::Real<4>, Ty::Real<4>, Ty::Real<4>>, 825 genLibCall}, 826 {"__ppc_fnmsub", "llvm.ppc.fnmsub.f64", 827 genFuncType<Ty::Real<8>, Ty::Real<8>, Ty::Real<8>, Ty::Real<8>>, 828 genLibCall}, 829 {"__ppc_fre", "llvm.ppc.fre", genFuncType<Ty::Real<8>, Ty::Real<8>>, 830 genLibCall}, 831 {"__ppc_fres", "llvm.ppc.fres", genFuncType<Ty::Real<4>, Ty::Real<4>>, 832 genLibCall}, 833 {"__ppc_frsqrte", "llvm.ppc.frsqrte", genFuncType<Ty::Real<8>, Ty::Real<8>>, 834 genLibCall}, 835 {"__ppc_frsqrtes", "llvm.ppc.frsqrtes", 836 genFuncType<Ty::Real<4>, Ty::Real<4>>, genLibCall}, 837 {"__ppc_vec_cvbf16spn", "llvm.ppc.vsx.xvcvbf16spn", 838 genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall}, 839 {"__ppc_vec_cvspbf16_", "llvm.ppc.vsx.xvcvspbf16", 840 genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>, genLibCall}, 841 {"__ppc_vec_madd", "llvm.fma.v4f32", 842 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>, 843 Ty::RealVector<4>>, 844 genLibCall}, 845 {"__ppc_vec_madd", "llvm.fma.v2f64", 846 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>, 847 Ty::RealVector<8>>, 848 genLibCall}, 849 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsb", 850 genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>, 851 Ty::IntegerVector<1>>, 852 genLibCall}, 853 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsh", 854 genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>, 855 Ty::IntegerVector<2>>, 856 genLibCall}, 857 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsw", 858 genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>, 859 Ty::IntegerVector<4>>, 860 genLibCall}, 861 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxsd", 862 genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>, 863 Ty::IntegerVector<8>>, 864 genLibCall}, 865 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxub", 866 genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>, 867 Ty::UnsignedVector<1>>, 868 genLibCall}, 869 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxuh", 870 genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>, 871 Ty::UnsignedVector<2>>, 872 genLibCall}, 873 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxuw", 874 genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>, 875 Ty::UnsignedVector<4>>, 876 genLibCall}, 877 {"__ppc_vec_max", "llvm.ppc.altivec.vmaxud", 878 genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>, 879 Ty::UnsignedVector<8>>, 880 genLibCall}, 881 {"__ppc_vec_max", "llvm.ppc.vsx.xvmaxsp", 882 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>, 883 genLibCall}, 884 {"__ppc_vec_max", "llvm.ppc.vsx.xvmaxdp", 885 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>, 886 genLibCall}, 887 {"__ppc_vec_min", "llvm.ppc.altivec.vminsb", 888 genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>, 889 Ty::IntegerVector<1>>, 890 genLibCall}, 891 {"__ppc_vec_min", "llvm.ppc.altivec.vminsh", 892 genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>, 893 Ty::IntegerVector<2>>, 894 genLibCall}, 895 {"__ppc_vec_min", "llvm.ppc.altivec.vminsw", 896 genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>, 897 Ty::IntegerVector<4>>, 898 genLibCall}, 899 {"__ppc_vec_min", "llvm.ppc.altivec.vminsd", 900 genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>, 901 Ty::IntegerVector<8>>, 902 genLibCall}, 903 {"__ppc_vec_min", "llvm.ppc.altivec.vminub", 904 genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>, 905 Ty::UnsignedVector<1>>, 906 genLibCall}, 907 {"__ppc_vec_min", "llvm.ppc.altivec.vminuh", 908 genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>, 909 Ty::UnsignedVector<2>>, 910 genLibCall}, 911 {"__ppc_vec_min", "llvm.ppc.altivec.vminuw", 912 genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>, 913 Ty::UnsignedVector<4>>, 914 genLibCall}, 915 {"__ppc_vec_min", "llvm.ppc.altivec.vminud", 916 genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>, 917 Ty::UnsignedVector<8>>, 918 genLibCall}, 919 {"__ppc_vec_min", "llvm.ppc.vsx.xvminsp", 920 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>, 921 genLibCall}, 922 {"__ppc_vec_min", "llvm.ppc.vsx.xvmindp", 923 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>, 924 genLibCall}, 925 {"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v4f32", 926 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>, 927 Ty::RealVector<4>>, 928 genLibCall}, 929 {"__ppc_vec_nmsub", "llvm.ppc.fnmsub.v2f64", 930 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>, 931 Ty::RealVector<8>>, 932 genLibCall}, 933 }; 934 935 const IntrinsicHandler *findPPCIntrinsicHandler(llvm::StringRef name) { 936 auto compare = [](const IntrinsicHandler &ppcHandler, llvm::StringRef name) { 937 return name.compare(ppcHandler.name) > 0; 938 }; 939 auto result = llvm::lower_bound(ppcHandlers, name, compare); 940 return result != std::end(ppcHandlers) && result->name == name ? result 941 : nullptr; 942 } 943 944 using RtMap = Fortran::common::StaticMultimapView<MathOperation>; 945 static constexpr RtMap ppcMathOps(ppcMathOperations); 946 static_assert(ppcMathOps.Verify() && "map must be sorted"); 947 948 std::pair<const MathOperation *, const MathOperation *> 949 checkPPCMathOperationsRange(llvm::StringRef name) { 950 return ppcMathOps.equal_range(name); 951 } 952 953 // Helper functions for vector element ordering. 954 bool PPCIntrinsicLibrary::isBEVecElemOrderOnLE() { 955 const auto triple{fir::getTargetTriple(builder.getModule())}; 956 return (triple.isLittleEndian() && 957 converter->getLoweringOptions().getNoPPCNativeVecElemOrder()); 958 } 959 bool PPCIntrinsicLibrary::isNativeVecElemOrderOnLE() { 960 const auto triple{fir::getTargetTriple(builder.getModule())}; 961 return (triple.isLittleEndian() && 962 !converter->getLoweringOptions().getNoPPCNativeVecElemOrder()); 963 } 964 bool PPCIntrinsicLibrary::changeVecElemOrder() { 965 const auto triple{fir::getTargetTriple(builder.getModule())}; 966 return (triple.isLittleEndian() != 967 converter->getLoweringOptions().getNoPPCNativeVecElemOrder()); 968 } 969 970 static mlir::FunctionType genMmaVpFuncType(mlir::MLIRContext *context, 971 int quadCnt, int pairCnt, int vecCnt, 972 int intCnt = 0, 973 int vecElemBitSize = 8, 974 int intBitSize = 32) { 975 // Constructs a function type with the following signature: 976 // Result type: __vector_pair 977 // Arguments: 978 // quadCnt: number of arguments that has __vector_quad type, followed by 979 // pairCnt: number of arguments that has __vector_pair type, followed by 980 // vecCnt: number of arguments that has vector(integer) type, followed by 981 // intCnt: number of arguments that has integer type 982 // vecElemBitSize: specifies the size of vector elements in bits 983 // intBitSize: specifies the size of integer arguments in bits 984 auto vType{mlir::VectorType::get( 985 128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))}; 986 auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))}; 987 auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))}; 988 auto iType{mlir::IntegerType::get(context, intBitSize)}; 989 llvm::SmallVector<mlir::Type> argTypes; 990 for (int i = 0; i < quadCnt; ++i) { 991 argTypes.push_back(vqType); 992 } 993 for (int i = 0; i < pairCnt; ++i) { 994 argTypes.push_back(vpType); 995 } 996 for (int i = 0; i < vecCnt; ++i) { 997 argTypes.push_back(vType); 998 } 999 for (int i = 0; i < intCnt; ++i) { 1000 argTypes.push_back(iType); 1001 } 1002 1003 return mlir::FunctionType::get(context, argTypes, {vpType}); 1004 } 1005 1006 static mlir::FunctionType genMmaVqFuncType(mlir::MLIRContext *context, 1007 int quadCnt, int pairCnt, int vecCnt, 1008 int intCnt = 0, 1009 int vecElemBitSize = 8, 1010 int intBitSize = 32) { 1011 // Constructs a function type with the following signature: 1012 // Result type: __vector_quad 1013 // Arguments: 1014 // quadCnt: number of arguments that has __vector_quad type, followed by 1015 // pairCnt: number of arguments that has __vector_pair type, followed by 1016 // vecCnt: number of arguments that has vector(integer) type, followed by 1017 // intCnt: number of arguments that has integer type 1018 // vecElemBitSize: specifies the size of vector elements in bits 1019 // intBitSize: specifies the size of integer arguments in bits 1020 auto vType{mlir::VectorType::get( 1021 128 / vecElemBitSize, mlir::IntegerType::get(context, vecElemBitSize))}; 1022 auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))}; 1023 auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))}; 1024 auto iType{mlir::IntegerType::get(context, intBitSize)}; 1025 llvm::SmallVector<mlir::Type> argTypes; 1026 for (int i = 0; i < quadCnt; ++i) { 1027 argTypes.push_back(vqType); 1028 } 1029 for (int i = 0; i < pairCnt; ++i) { 1030 argTypes.push_back(vpType); 1031 } 1032 for (int i = 0; i < vecCnt; ++i) { 1033 argTypes.push_back(vType); 1034 } 1035 for (int i = 0; i < intCnt; ++i) { 1036 argTypes.push_back(iType); 1037 } 1038 1039 return mlir::FunctionType::get(context, argTypes, {vqType}); 1040 } 1041 1042 mlir::FunctionType genMmaDisassembleFuncType(mlir::MLIRContext *context, 1043 MMAOp mmaOp) { 1044 auto vType{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))}; 1045 llvm::SmallVector<mlir::Type> members; 1046 1047 if (mmaOp == MMAOp::DisassembleAcc) { 1048 auto vqType{fir::VectorType::get(512, mlir::IntegerType::get(context, 1))}; 1049 members.push_back(vType); 1050 members.push_back(vType); 1051 members.push_back(vType); 1052 members.push_back(vType); 1053 auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)}; 1054 return mlir::FunctionType::get(context, {vqType}, {resType}); 1055 } else if (mmaOp == MMAOp::DisassemblePair) { 1056 auto vpType{fir::VectorType::get(256, mlir::IntegerType::get(context, 1))}; 1057 members.push_back(vType); 1058 members.push_back(vType); 1059 auto resType{mlir::LLVM::LLVMStructType::getLiteral(context, members)}; 1060 return mlir::FunctionType::get(context, {vpType}, {resType}); 1061 } else { 1062 llvm_unreachable( 1063 "Unsupported intrinsic code for function signature generator"); 1064 } 1065 } 1066 1067 //===----------------------------------------------------------------------===// 1068 // PowerPC specific intrinsic handlers. 1069 //===----------------------------------------------------------------------===// 1070 1071 // MTFSF, MTFSFI 1072 template <bool isImm> 1073 void PPCIntrinsicLibrary::genMtfsf(llvm::ArrayRef<fir::ExtendedValue> args) { 1074 assert(args.size() == 2); 1075 llvm::SmallVector<mlir::Value> scalarArgs; 1076 for (const fir::ExtendedValue &arg : args) 1077 if (arg.getUnboxed()) 1078 scalarArgs.emplace_back(fir::getBase(arg)); 1079 else 1080 mlir::emitError(loc, "nonscalar intrinsic argument"); 1081 1082 mlir::FunctionType libFuncType; 1083 mlir::func::FuncOp funcOp; 1084 if (isImm) { 1085 libFuncType = genFuncType<Ty::Void, Ty::Integer<4>, Ty::Integer<4>>( 1086 builder.getContext(), builder); 1087 funcOp = builder.createFunction(loc, "llvm.ppc.mtfsfi", libFuncType); 1088 } else { 1089 libFuncType = genFuncType<Ty::Void, Ty::Integer<4>, Ty::Real<8>>( 1090 builder.getContext(), builder); 1091 funcOp = builder.createFunction(loc, "llvm.ppc.mtfsf", libFuncType); 1092 } 1093 builder.create<fir::CallOp>(loc, funcOp, scalarArgs); 1094 } 1095 1096 // VEC_ABS 1097 fir::ExtendedValue 1098 PPCIntrinsicLibrary::genVecAbs(mlir::Type resultType, 1099 llvm::ArrayRef<fir::ExtendedValue> args) { 1100 assert(args.size() == 1); 1101 auto context{builder.getContext()}; 1102 auto argBases{getBasesForArgs(args)}; 1103 auto vTypeInfo{getVecTypeFromFir(argBases[0])}; 1104 1105 mlir::func::FuncOp funcOp{nullptr}; 1106 mlir::FunctionType ftype; 1107 llvm::StringRef fname{}; 1108 if (vTypeInfo.isFloat()) { 1109 if (vTypeInfo.isFloat32()) { 1110 fname = "llvm.fabs.v4f32"; 1111 ftype = 1112 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>>(context, builder); 1113 } else if (vTypeInfo.isFloat64()) { 1114 fname = "llvm.fabs.v2f64"; 1115 ftype = 1116 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>>(context, builder); 1117 } 1118 1119 funcOp = builder.createFunction(loc, fname, ftype); 1120 auto callOp{builder.create<fir::CallOp>(loc, funcOp, argBases[0])}; 1121 return callOp.getResult(0); 1122 } else if (auto eleTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy)) { 1123 // vec_abs(arg1) = max(0 - arg1, arg1) 1124 1125 auto newVecTy{mlir::VectorType::get(vTypeInfo.len, eleTy)}; 1126 auto varg1{builder.createConvert(loc, newVecTy, argBases[0])}; 1127 // construct vector(0,..) 1128 auto zeroVal{builder.createIntegerConstant(loc, eleTy, 0)}; 1129 auto vZero{ 1130 builder.create<mlir::vector::BroadcastOp>(loc, newVecTy, zeroVal)}; 1131 auto zeroSubVarg1{builder.create<mlir::arith::SubIOp>(loc, vZero, varg1)}; 1132 1133 mlir::func::FuncOp funcOp{nullptr}; 1134 switch (eleTy.getWidth()) { 1135 case 8: 1136 fname = "llvm.ppc.altivec.vmaxsb"; 1137 ftype = genFuncType<Ty::IntegerVector<1>, Ty::IntegerVector<1>, 1138 Ty::IntegerVector<1>>(context, builder); 1139 break; 1140 case 16: 1141 fname = "llvm.ppc.altivec.vmaxsh"; 1142 ftype = genFuncType<Ty::IntegerVector<2>, Ty::IntegerVector<2>, 1143 Ty::IntegerVector<2>>(context, builder); 1144 break; 1145 case 32: 1146 fname = "llvm.ppc.altivec.vmaxsw"; 1147 ftype = genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>, 1148 Ty::IntegerVector<4>>(context, builder); 1149 break; 1150 case 64: 1151 fname = "llvm.ppc.altivec.vmaxsd"; 1152 ftype = genFuncType<Ty::IntegerVector<8>, Ty::IntegerVector<8>, 1153 Ty::IntegerVector<8>>(context, builder); 1154 break; 1155 default: 1156 llvm_unreachable("invalid integer size"); 1157 } 1158 funcOp = builder.createFunction(loc, fname, ftype); 1159 1160 mlir::Value args[] = {zeroSubVarg1, varg1}; 1161 auto callOp{builder.create<fir::CallOp>(loc, funcOp, args)}; 1162 return builder.createConvert(loc, argBases[0].getType(), 1163 callOp.getResult(0)); 1164 } 1165 1166 llvm_unreachable("unknown vector type"); 1167 } 1168 1169 // VEC_ADD, VEC_AND, VEC_SUB, VEC_MUL, VEC_XOR 1170 template <VecOp vop> 1171 fir::ExtendedValue PPCIntrinsicLibrary::genVecAddAndMulSubXor( 1172 mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) { 1173 assert(args.size() == 2); 1174 auto argBases{getBasesForArgs(args)}; 1175 auto argsTy{getTypesForArgs(argBases)}; 1176 assert(mlir::isa<fir::VectorType>(argsTy[0]) && 1177 mlir::isa<fir::VectorType>(argsTy[1])); 1178 1179 auto vecTyInfo{getVecTypeFromFir(argBases[0])}; 1180 1181 const auto isInteger{mlir::isa<mlir::IntegerType>(vecTyInfo.eleTy)}; 1182 const auto isFloat{mlir::isa<mlir::FloatType>(vecTyInfo.eleTy)}; 1183 assert((isInteger || isFloat) && "unknown vector type"); 1184 1185 auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)}; 1186 1187 mlir::Value r{nullptr}; 1188 switch (vop) { 1189 case VecOp::Add: 1190 if (isInteger) 1191 r = builder.create<mlir::arith::AddIOp>(loc, vargs[0], vargs[1]); 1192 else if (isFloat) 1193 r = builder.create<mlir::arith::AddFOp>(loc, vargs[0], vargs[1]); 1194 break; 1195 case VecOp::Mul: 1196 if (isInteger) 1197 r = builder.create<mlir::arith::MulIOp>(loc, vargs[0], vargs[1]); 1198 else if (isFloat) 1199 r = builder.create<mlir::arith::MulFOp>(loc, vargs[0], vargs[1]); 1200 break; 1201 case VecOp::Sub: 1202 if (isInteger) 1203 r = builder.create<mlir::arith::SubIOp>(loc, vargs[0], vargs[1]); 1204 else if (isFloat) 1205 r = builder.create<mlir::arith::SubFOp>(loc, vargs[0], vargs[1]); 1206 break; 1207 case VecOp::And: 1208 case VecOp::Xor: { 1209 mlir::Value arg1{nullptr}; 1210 mlir::Value arg2{nullptr}; 1211 if (isInteger) { 1212 arg1 = vargs[0]; 1213 arg2 = vargs[1]; 1214 } else if (isFloat) { 1215 // bitcast the arguments to integer 1216 auto wd{mlir::dyn_cast<mlir::FloatType>(vecTyInfo.eleTy).getWidth()}; 1217 auto ftype{builder.getIntegerType(wd)}; 1218 auto bcVecTy{mlir::VectorType::get(vecTyInfo.len, ftype)}; 1219 arg1 = builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[0]); 1220 arg2 = builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[1]); 1221 } 1222 if (vop == VecOp::And) 1223 r = builder.create<mlir::arith::AndIOp>(loc, arg1, arg2); 1224 else if (vop == VecOp::Xor) 1225 r = builder.create<mlir::arith::XOrIOp>(loc, arg1, arg2); 1226 1227 if (isFloat) 1228 r = builder.create<mlir::vector::BitCastOp>(loc, vargs[0].getType(), r); 1229 1230 break; 1231 } 1232 } 1233 1234 return builder.createConvert(loc, argsTy[0], r); 1235 } 1236 1237 // VEC_ANY_GE 1238 template <VecOp vop> 1239 fir::ExtendedValue 1240 PPCIntrinsicLibrary::genVecAnyCompare(mlir::Type resultType, 1241 llvm::ArrayRef<fir::ExtendedValue> args) { 1242 assert(args.size() == 2); 1243 assert(vop == VecOp::Anyge && "unknown vector compare operation"); 1244 auto argBases{getBasesForArgs(args)}; 1245 VecTypeInfo vTypeInfo{getVecTypeFromFir(argBases[0])}; 1246 [[maybe_unused]] const auto isSupportedTy{ 1247 mlir::isa<mlir::Float32Type, mlir::Float64Type, mlir::IntegerType>( 1248 vTypeInfo.eleTy)}; 1249 assert(isSupportedTy && "unsupported vector type"); 1250 1251 // Constants for mapping CR6 bits to predicate result 1252 enum { CR6_EQ_REV = 1, CR6_LT_REV = 3 }; 1253 1254 auto context{builder.getContext()}; 1255 1256 static std::map<std::pair<ParamTypeId, unsigned>, 1257 std::pair<llvm::StringRef, mlir::FunctionType>> 1258 uiBuiltin{ 1259 {std::make_pair(ParamTypeId::IntegerVector, 8), 1260 std::make_pair( 1261 "llvm.ppc.altivec.vcmpgtsb.p", 1262 genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<1>, 1263 Ty::IntegerVector<1>>(context, builder))}, 1264 {std::make_pair(ParamTypeId::IntegerVector, 16), 1265 std::make_pair( 1266 "llvm.ppc.altivec.vcmpgtsh.p", 1267 genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<2>, 1268 Ty::IntegerVector<2>>(context, builder))}, 1269 {std::make_pair(ParamTypeId::IntegerVector, 32), 1270 std::make_pair( 1271 "llvm.ppc.altivec.vcmpgtsw.p", 1272 genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<4>, 1273 Ty::IntegerVector<4>>(context, builder))}, 1274 {std::make_pair(ParamTypeId::IntegerVector, 64), 1275 std::make_pair( 1276 "llvm.ppc.altivec.vcmpgtsd.p", 1277 genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::IntegerVector<8>, 1278 Ty::IntegerVector<8>>(context, builder))}, 1279 {std::make_pair(ParamTypeId::UnsignedVector, 8), 1280 std::make_pair( 1281 "llvm.ppc.altivec.vcmpgtub.p", 1282 genFuncType<Ty::Integer<4>, Ty::Integer<4>, 1283 Ty::UnsignedVector<1>, Ty::UnsignedVector<1>>( 1284 context, builder))}, 1285 {std::make_pair(ParamTypeId::UnsignedVector, 16), 1286 std::make_pair( 1287 "llvm.ppc.altivec.vcmpgtuh.p", 1288 genFuncType<Ty::Integer<4>, Ty::Integer<4>, 1289 Ty::UnsignedVector<2>, Ty::UnsignedVector<2>>( 1290 context, builder))}, 1291 {std::make_pair(ParamTypeId::UnsignedVector, 32), 1292 std::make_pair( 1293 "llvm.ppc.altivec.vcmpgtuw.p", 1294 genFuncType<Ty::Integer<4>, Ty::Integer<4>, 1295 Ty::UnsignedVector<4>, Ty::UnsignedVector<4>>( 1296 context, builder))}, 1297 {std::make_pair(ParamTypeId::UnsignedVector, 64), 1298 std::make_pair( 1299 "llvm.ppc.altivec.vcmpgtud.p", 1300 genFuncType<Ty::Integer<4>, Ty::Integer<4>, 1301 Ty::UnsignedVector<8>, Ty::UnsignedVector<8>>( 1302 context, builder))}, 1303 }; 1304 1305 mlir::FunctionType ftype{nullptr}; 1306 llvm::StringRef fname; 1307 const auto i32Ty{mlir::IntegerType::get(context, 32)}; 1308 llvm::SmallVector<mlir::Value> cmpArgs; 1309 mlir::Value op{nullptr}; 1310 const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()}; 1311 1312 if (auto elementTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy)) { 1313 std::pair<llvm::StringRef, mlir::FunctionType> bi; 1314 bi = (elementTy.isUnsignedInteger()) 1315 ? uiBuiltin[std::pair(ParamTypeId::UnsignedVector, width)] 1316 : uiBuiltin[std::pair(ParamTypeId::IntegerVector, width)]; 1317 1318 fname = std::get<0>(bi); 1319 ftype = std::get<1>(bi); 1320 1321 op = builder.createIntegerConstant(loc, i32Ty, CR6_LT_REV); 1322 cmpArgs.emplace_back(op); 1323 // reverse the argument order 1324 cmpArgs.emplace_back(argBases[1]); 1325 cmpArgs.emplace_back(argBases[0]); 1326 } else if (vTypeInfo.isFloat()) { 1327 if (vTypeInfo.isFloat32()) { 1328 fname = "llvm.ppc.vsx.xvcmpgesp.p"; 1329 ftype = genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::RealVector<4>, 1330 Ty::RealVector<4>>(context, builder); 1331 } else { 1332 fname = "llvm.ppc.vsx.xvcmpgedp.p"; 1333 ftype = genFuncType<Ty::Integer<4>, Ty::Integer<4>, Ty::RealVector<8>, 1334 Ty::RealVector<8>>(context, builder); 1335 } 1336 op = builder.createIntegerConstant(loc, i32Ty, CR6_EQ_REV); 1337 cmpArgs.emplace_back(op); 1338 cmpArgs.emplace_back(argBases[0]); 1339 cmpArgs.emplace_back(argBases[1]); 1340 } 1341 assert((!fname.empty() && ftype) && "invalid type"); 1342 1343 mlir::func::FuncOp funcOp{builder.createFunction(loc, fname, ftype)}; 1344 auto callOp{builder.create<fir::CallOp>(loc, funcOp, cmpArgs)}; 1345 return callOp.getResult(0); 1346 } 1347 1348 static std::pair<llvm::StringRef, mlir::FunctionType> 1349 getVecCmpFuncTypeAndName(VecTypeInfo &vTypeInfo, VecOp vop, 1350 fir::FirOpBuilder &builder) { 1351 auto context{builder.getContext()}; 1352 static std::map<std::pair<ParamTypeId, unsigned>, 1353 std::pair<llvm::StringRef, mlir::FunctionType>> 1354 iuBuiltinName{ 1355 {std::make_pair(ParamTypeId::IntegerVector, 8), 1356 std::make_pair( 1357 "llvm.ppc.altivec.vcmpgtsb", 1358 genFuncType<Ty::UnsignedVector<1>, Ty::IntegerVector<1>, 1359 Ty::IntegerVector<1>>(context, builder))}, 1360 {std::make_pair(ParamTypeId::IntegerVector, 16), 1361 std::make_pair( 1362 "llvm.ppc.altivec.vcmpgtsh", 1363 genFuncType<Ty::UnsignedVector<2>, Ty::IntegerVector<2>, 1364 Ty::IntegerVector<2>>(context, builder))}, 1365 {std::make_pair(ParamTypeId::IntegerVector, 32), 1366 std::make_pair( 1367 "llvm.ppc.altivec.vcmpgtsw", 1368 genFuncType<Ty::UnsignedVector<4>, Ty::IntegerVector<4>, 1369 Ty::IntegerVector<4>>(context, builder))}, 1370 {std::make_pair(ParamTypeId::IntegerVector, 64), 1371 std::make_pair( 1372 "llvm.ppc.altivec.vcmpgtsd", 1373 genFuncType<Ty::UnsignedVector<8>, Ty::IntegerVector<8>, 1374 Ty::IntegerVector<8>>(context, builder))}, 1375 {std::make_pair(ParamTypeId::UnsignedVector, 8), 1376 std::make_pair( 1377 "llvm.ppc.altivec.vcmpgtub", 1378 genFuncType<Ty::UnsignedVector<1>, Ty::UnsignedVector<1>, 1379 Ty::UnsignedVector<1>>(context, builder))}, 1380 {std::make_pair(ParamTypeId::UnsignedVector, 16), 1381 std::make_pair( 1382 "llvm.ppc.altivec.vcmpgtuh", 1383 genFuncType<Ty::UnsignedVector<2>, Ty::UnsignedVector<2>, 1384 Ty::UnsignedVector<2>>(context, builder))}, 1385 {std::make_pair(ParamTypeId::UnsignedVector, 32), 1386 std::make_pair( 1387 "llvm.ppc.altivec.vcmpgtuw", 1388 genFuncType<Ty::UnsignedVector<4>, Ty::UnsignedVector<4>, 1389 Ty::UnsignedVector<4>>(context, builder))}, 1390 {std::make_pair(ParamTypeId::UnsignedVector, 64), 1391 std::make_pair( 1392 "llvm.ppc.altivec.vcmpgtud", 1393 genFuncType<Ty::UnsignedVector<8>, Ty::UnsignedVector<8>, 1394 Ty::UnsignedVector<8>>(context, builder))}}; 1395 1396 // VSX only defines GE and GT builtins. Cmple and Cmplt use GE and GT with 1397 // arguments revsered. 1398 enum class Cmp { gtOrLt, geOrLe }; 1399 static std::map<std::pair<Cmp, int>, 1400 std::pair<llvm::StringRef, mlir::FunctionType>> 1401 rGBI{{std::make_pair(Cmp::geOrLe, 32), 1402 std::make_pair("llvm.ppc.vsx.xvcmpgesp", 1403 genFuncType<Ty::UnsignedVector<4>, Ty::RealVector<4>, 1404 Ty::RealVector<4>>(context, builder))}, 1405 {std::make_pair(Cmp::geOrLe, 64), 1406 std::make_pair("llvm.ppc.vsx.xvcmpgedp", 1407 genFuncType<Ty::UnsignedVector<8>, Ty::RealVector<8>, 1408 Ty::RealVector<8>>(context, builder))}, 1409 {std::make_pair(Cmp::gtOrLt, 32), 1410 std::make_pair("llvm.ppc.vsx.xvcmpgtsp", 1411 genFuncType<Ty::UnsignedVector<4>, Ty::RealVector<4>, 1412 Ty::RealVector<4>>(context, builder))}, 1413 {std::make_pair(Cmp::gtOrLt, 64), 1414 std::make_pair("llvm.ppc.vsx.xvcmpgtdp", 1415 genFuncType<Ty::UnsignedVector<8>, Ty::RealVector<8>, 1416 Ty::RealVector<8>>(context, builder))}}; 1417 1418 const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()}; 1419 std::pair<llvm::StringRef, mlir::FunctionType> specFunc; 1420 if (auto elementTy = mlir::dyn_cast<mlir::IntegerType>(vTypeInfo.eleTy)) 1421 specFunc = 1422 (elementTy.isUnsignedInteger()) 1423 ? iuBuiltinName[std::make_pair(ParamTypeId::UnsignedVector, width)] 1424 : iuBuiltinName[std::make_pair(ParamTypeId::IntegerVector, width)]; 1425 else if (vTypeInfo.isFloat()) 1426 specFunc = (vop == VecOp::Cmpge || vop == VecOp::Cmple) 1427 ? rGBI[std::make_pair(Cmp::geOrLe, width)] 1428 : rGBI[std::make_pair(Cmp::gtOrLt, width)]; 1429 1430 assert(!std::get<0>(specFunc).empty() && "unknown builtin name"); 1431 assert(std::get<1>(specFunc) && "unknown function type"); 1432 return specFunc; 1433 } 1434 1435 // VEC_CMPGE, VEC_CMPGT, VEC_CMPLE, VEC_CMPLT 1436 template <VecOp vop> 1437 fir::ExtendedValue 1438 PPCIntrinsicLibrary::genVecCmp(mlir::Type resultType, 1439 llvm::ArrayRef<fir::ExtendedValue> args) { 1440 assert(args.size() == 2); 1441 auto context{builder.getContext()}; 1442 auto argBases{getBasesForArgs(args)}; 1443 VecTypeInfo vecTyInfo{getVecTypeFromFir(argBases[0])}; 1444 auto varg{convertVecArgs(builder, loc, vecTyInfo, argBases)}; 1445 1446 std::pair<llvm::StringRef, mlir::FunctionType> funcTyNam{ 1447 getVecCmpFuncTypeAndName(vecTyInfo, vop, builder)}; 1448 1449 mlir::func::FuncOp funcOp = builder.createFunction( 1450 loc, std::get<0>(funcTyNam), std::get<1>(funcTyNam)); 1451 1452 mlir::Value res{nullptr}; 1453 1454 if (auto eTy = mlir::dyn_cast<mlir::IntegerType>(vecTyInfo.eleTy)) { 1455 constexpr int firstArg{0}; 1456 constexpr int secondArg{1}; 1457 std::map<VecOp, std::array<int, 2>> argOrder{ 1458 {VecOp::Cmpge, {secondArg, firstArg}}, 1459 {VecOp::Cmple, {firstArg, secondArg}}, 1460 {VecOp::Cmpgt, {firstArg, secondArg}}, 1461 {VecOp::Cmplt, {secondArg, firstArg}}}; 1462 1463 // Construct the function return type, unsigned vector, for conversion. 1464 auto itype = mlir::IntegerType::get(context, eTy.getWidth(), 1465 mlir::IntegerType::Unsigned); 1466 auto returnType = fir::VectorType::get(vecTyInfo.len, itype); 1467 1468 switch (vop) { 1469 case VecOp::Cmpgt: 1470 case VecOp::Cmplt: { 1471 // arg1 > arg2 --> vcmpgt(arg1, arg2) 1472 // arg1 < arg2 --> vcmpgt(arg2, arg1) 1473 mlir::Value vargs[]{argBases[argOrder[vop][0]], 1474 argBases[argOrder[vop][1]]}; 1475 auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)}; 1476 res = callOp.getResult(0); 1477 break; 1478 } 1479 case VecOp::Cmpge: 1480 case VecOp::Cmple: { 1481 // arg1 >= arg2 --> vcmpge(arg2, arg1) xor vector(-1) 1482 // arg1 <= arg2 --> vcmpge(arg1, arg2) xor vector(-1) 1483 mlir::Value vargs[]{argBases[argOrder[vop][0]], 1484 argBases[argOrder[vop][1]]}; 1485 1486 // Construct a constant vector(-1) 1487 auto negOneVal{builder.createIntegerConstant( 1488 loc, getConvertedElementType(context, eTy), -1)}; 1489 auto vNegOne{builder.create<mlir::vector::BroadcastOp>( 1490 loc, vecTyInfo.toMlirVectorType(context), negOneVal)}; 1491 1492 auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)}; 1493 mlir::Value callRes{callOp.getResult(0)}; 1494 auto vargs2{ 1495 convertVecArgs(builder, loc, vecTyInfo, mlir::ValueRange{callRes})}; 1496 auto xorRes{builder.create<mlir::arith::XOrIOp>(loc, vargs2[0], vNegOne)}; 1497 1498 res = builder.createConvert(loc, returnType, xorRes); 1499 break; 1500 } 1501 default: 1502 llvm_unreachable("Invalid vector operation for generator"); 1503 } 1504 } else if (vecTyInfo.isFloat()) { 1505 mlir::Value vargs[2]; 1506 switch (vop) { 1507 case VecOp::Cmpge: 1508 case VecOp::Cmpgt: 1509 vargs[0] = argBases[0]; 1510 vargs[1] = argBases[1]; 1511 break; 1512 case VecOp::Cmple: 1513 case VecOp::Cmplt: 1514 // Swap the arguments as xvcmpg[et] is used 1515 vargs[0] = argBases[1]; 1516 vargs[1] = argBases[0]; 1517 break; 1518 default: 1519 llvm_unreachable("Invalid vector operation for generator"); 1520 } 1521 auto callOp{builder.create<fir::CallOp>(loc, funcOp, vargs)}; 1522 res = callOp.getResult(0); 1523 } else 1524 llvm_unreachable("invalid vector type"); 1525 1526 return res; 1527 } 1528 1529 static inline mlir::Value swapVectorWordPairs(fir::FirOpBuilder &builder, 1530 mlir::Location loc, 1531 mlir::Value arg) { 1532 auto ty = arg.getType(); 1533 auto context{builder.getContext()}; 1534 auto vtype{mlir::VectorType::get(16, mlir::IntegerType::get(context, 8))}; 1535 1536 if (ty != vtype) 1537 arg = builder.create<mlir::LLVM::BitcastOp>(loc, vtype, arg).getResult(); 1538 1539 llvm::SmallVector<int64_t, 16> mask{4, 5, 6, 7, 0, 1, 2, 3, 1540 12, 13, 14, 15, 8, 9, 10, 11}; 1541 arg = builder.create<mlir::vector::ShuffleOp>(loc, arg, arg, mask); 1542 if (ty != vtype) 1543 arg = builder.create<mlir::LLVM::BitcastOp>(loc, ty, arg); 1544 return arg; 1545 } 1546 1547 // VEC_CONVERT, VEC_CTF, VEC_CVF 1548 template <VecOp vop> 1549 fir::ExtendedValue 1550 PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType, 1551 llvm::ArrayRef<fir::ExtendedValue> args) { 1552 auto context{builder.getContext()}; 1553 auto argBases{getBasesForArgs(args)}; 1554 auto vecTyInfo{getVecTypeFromFir(argBases[0])}; 1555 auto mlirTy{vecTyInfo.toMlirVectorType(context)}; 1556 auto vArg1{builder.createConvert(loc, mlirTy, argBases[0])}; 1557 const auto i32Ty{mlir::IntegerType::get(context, 32)}; 1558 1559 switch (vop) { 1560 case VecOp::Ctf: { 1561 assert(args.size() == 2); 1562 auto convArg{builder.createConvert(loc, i32Ty, argBases[1])}; 1563 auto eTy{mlir::dyn_cast<mlir::IntegerType>(vecTyInfo.eleTy)}; 1564 assert(eTy && "Unsupported vector type"); 1565 const auto isUnsigned{eTy.isUnsignedInteger()}; 1566 const auto width{eTy.getWidth()}; 1567 1568 if (width == 32) { 1569 auto ftype{(isUnsigned) 1570 ? genFuncType<Ty::RealVector<4>, Ty::UnsignedVector<4>, 1571 Ty::Integer<4>>(context, builder) 1572 : genFuncType<Ty::RealVector<4>, Ty::IntegerVector<4>, 1573 Ty::Integer<4>>(context, builder)}; 1574 const llvm::StringRef fname{(isUnsigned) ? "llvm.ppc.altivec.vcfux" 1575 : "llvm.ppc.altivec.vcfsx"}; 1576 auto funcOp{builder.createFunction(loc, fname, ftype)}; 1577 mlir::Value newArgs[] = {argBases[0], convArg}; 1578 auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)}; 1579 1580 return callOp.getResult(0); 1581 } else if (width == 64) { 1582 auto fTy{mlir::Float64Type::get(context)}; 1583 auto ty{mlir::VectorType::get(2, fTy)}; 1584 1585 // vec_vtf(arg1, arg2) = fmul(1.0 / (1 << arg2), llvm.sitofp(arg1)) 1586 auto convOp{(isUnsigned) 1587 ? builder.create<mlir::LLVM::UIToFPOp>(loc, ty, vArg1) 1588 : builder.create<mlir::LLVM::SIToFPOp>(loc, ty, vArg1)}; 1589 1590 // construct vector<1./(1<<arg1), 1.0/(1<<arg1)> 1591 auto constInt{mlir::dyn_cast_or_null<mlir::IntegerAttr>( 1592 mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[1].getDefiningOp()) 1593 .getValue())}; 1594 assert(constInt && "expected integer constant argument"); 1595 double f{1.0 / (1 << constInt.getInt())}; 1596 llvm::SmallVector<double> vals{f, f}; 1597 auto constOp{builder.create<mlir::arith::ConstantOp>( 1598 loc, ty, builder.getF64VectorAttr(vals))}; 1599 1600 auto mulOp{builder.create<mlir::LLVM::FMulOp>( 1601 loc, ty, convOp->getResult(0), constOp)}; 1602 1603 return builder.createConvert(loc, fir::VectorType::get(2, fTy), mulOp); 1604 } 1605 llvm_unreachable("invalid element integer kind"); 1606 } 1607 case VecOp::Convert: { 1608 assert(args.size() == 2); 1609 // resultType has mold type (if scalar) or element type (if array) 1610 auto resTyInfo{getVecTypeFromFirType(resultType)}; 1611 auto moldTy{resTyInfo.toMlirVectorType(context)}; 1612 auto firTy{resTyInfo.toFirVectorType()}; 1613 1614 // vec_convert(v, mold) = bitcast v to "type of mold" 1615 auto conv{builder.create<mlir::LLVM::BitcastOp>(loc, moldTy, vArg1)}; 1616 1617 return builder.createConvert(loc, firTy, conv); 1618 } 1619 case VecOp::Cvf: { 1620 assert(args.size() == 1); 1621 1622 mlir::Value newArgs[]{vArg1}; 1623 if (vecTyInfo.isFloat32()) { 1624 if (changeVecElemOrder()) 1625 newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); 1626 1627 const llvm::StringRef fname{"llvm.ppc.vsx.xvcvspdp"}; 1628 auto ftype{ 1629 genFuncType<Ty::RealVector<8>, Ty::RealVector<4>>(context, builder)}; 1630 auto funcOp{builder.createFunction(loc, fname, ftype)}; 1631 auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)}; 1632 1633 return callOp.getResult(0); 1634 } else if (vecTyInfo.isFloat64()) { 1635 const llvm::StringRef fname{"llvm.ppc.vsx.xvcvdpsp"}; 1636 auto ftype{ 1637 genFuncType<Ty::RealVector<4>, Ty::RealVector<8>>(context, builder)}; 1638 auto funcOp{builder.createFunction(loc, fname, ftype)}; 1639 newArgs[0] = 1640 builder.create<fir::CallOp>(loc, funcOp, newArgs).getResult(0); 1641 auto fvf32Ty{newArgs[0].getType()}; 1642 auto f32type{mlir::Float32Type::get(context)}; 1643 auto mvf32Ty{mlir::VectorType::get(4, f32type)}; 1644 newArgs[0] = builder.createConvert(loc, mvf32Ty, newArgs[0]); 1645 1646 if (changeVecElemOrder()) 1647 newArgs[0] = swapVectorWordPairs(builder, loc, newArgs[0]); 1648 1649 return builder.createConvert(loc, fvf32Ty, newArgs[0]); 1650 } 1651 llvm_unreachable("invalid element integer kind"); 1652 } 1653 default: 1654 llvm_unreachable("Invalid vector operation for generator"); 1655 } 1656 } 1657 1658 static mlir::Value convertVectorElementOrder(fir::FirOpBuilder &builder, 1659 mlir::Location loc, 1660 VecTypeInfo vecInfo, 1661 mlir::Value idx) { 1662 mlir::Value numSub1{ 1663 builder.createIntegerConstant(loc, idx.getType(), vecInfo.len - 1)}; 1664 return builder.create<mlir::LLVM::SubOp>(loc, idx.getType(), numSub1, idx); 1665 } 1666 1667 // VEC_EXTRACT 1668 fir::ExtendedValue 1669 PPCIntrinsicLibrary::genVecExtract(mlir::Type resultType, 1670 llvm::ArrayRef<fir::ExtendedValue> args) { 1671 assert(args.size() == 2); 1672 auto argBases{getBasesForArgs(args)}; 1673 auto argTypes{getTypesForArgs(argBases)}; 1674 auto vecTyInfo{getVecTypeFromFir(argBases[0])}; 1675 1676 auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())}; 1677 auto varg0{builder.createConvert(loc, mlirTy, argBases[0])}; 1678 1679 // arg2 modulo the number of elements in arg1 to determine the element 1680 // position 1681 auto numEle{builder.createIntegerConstant(loc, argTypes[1], vecTyInfo.len)}; 1682 mlir::Value uremOp{ 1683 builder.create<mlir::LLVM::URemOp>(loc, argBases[1], numEle)}; 1684 1685 if (!isNativeVecElemOrderOnLE()) 1686 uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp); 1687 1688 return builder.create<mlir::vector::ExtractElementOp>(loc, varg0, uremOp); 1689 } 1690 1691 // VEC_INSERT 1692 fir::ExtendedValue 1693 PPCIntrinsicLibrary::genVecInsert(mlir::Type resultType, 1694 llvm::ArrayRef<fir::ExtendedValue> args) { 1695 assert(args.size() == 3); 1696 auto argBases{getBasesForArgs(args)}; 1697 auto argTypes{getTypesForArgs(argBases)}; 1698 auto vecTyInfo{getVecTypeFromFir(argBases[1])}; 1699 auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())}; 1700 auto varg1{builder.createConvert(loc, mlirTy, argBases[1])}; 1701 1702 auto numEle{builder.createIntegerConstant(loc, argTypes[2], vecTyInfo.len)}; 1703 mlir::Value uremOp{ 1704 builder.create<mlir::LLVM::URemOp>(loc, argBases[2], numEle)}; 1705 1706 if (!isNativeVecElemOrderOnLE()) 1707 uremOp = convertVectorElementOrder(builder, loc, vecTyInfo, uremOp); 1708 1709 auto res{builder.create<mlir::vector::InsertElementOp>(loc, argBases[0], 1710 varg1, uremOp)}; 1711 return builder.create<fir::ConvertOp>(loc, vecTyInfo.toFirVectorType(), res); 1712 } 1713 1714 // VEC_MERGEH, VEC_MERGEL 1715 template <VecOp vop> 1716 fir::ExtendedValue 1717 PPCIntrinsicLibrary::genVecMerge(mlir::Type resultType, 1718 llvm::ArrayRef<fir::ExtendedValue> args) { 1719 assert(args.size() == 2); 1720 auto argBases{getBasesForArgs(args)}; 1721 auto vecTyInfo{getVecTypeFromFir(argBases[0])}; 1722 llvm::SmallVector<int64_t, 16> mMask; // native vector element order mask 1723 llvm::SmallVector<int64_t, 16> rMask; // non-native vector element order mask 1724 1725 switch (vop) { 1726 case VecOp::Mergeh: { 1727 switch (vecTyInfo.len) { 1728 case 2: { 1729 enum { V1 = 0, V2 = 2 }; 1730 mMask = {V1 + 0, V2 + 0}; 1731 rMask = {V2 + 1, V1 + 1}; 1732 break; 1733 } 1734 case 4: { 1735 enum { V1 = 0, V2 = 4 }; 1736 mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1}; 1737 rMask = {V2 + 2, V1 + 2, V2 + 3, V1 + 3}; 1738 break; 1739 } 1740 case 8: { 1741 enum { V1 = 0, V2 = 8 }; 1742 mMask = {V1 + 0, V2 + 0, V1 + 1, V2 + 1, V1 + 2, V2 + 2, V1 + 3, V2 + 3}; 1743 rMask = {V2 + 4, V1 + 4, V2 + 5, V1 + 5, V2 + 6, V1 + 6, V2 + 7, V1 + 7}; 1744 break; 1745 } 1746 case 16: 1747 mMask = {0x00, 0x10, 0x01, 0x11, 0x02, 0x12, 0x03, 0x13, 1748 0x04, 0x14, 0x05, 0x15, 0x06, 0x16, 0x07, 0x17}; 1749 rMask = {0x18, 0x08, 0x19, 0x09, 0x1A, 0x0A, 0x1B, 0x0B, 1750 0x1C, 0x0C, 0x1D, 0x0D, 0x1E, 0x0E, 0x1F, 0x0F}; 1751 break; 1752 default: 1753 llvm_unreachable("unexpected vector length"); 1754 } 1755 break; 1756 } 1757 case VecOp::Mergel: { 1758 switch (vecTyInfo.len) { 1759 case 2: { 1760 enum { V1 = 0, V2 = 2 }; 1761 mMask = {V1 + 1, V2 + 1}; 1762 rMask = {V2 + 0, V1 + 0}; 1763 break; 1764 } 1765 case 4: { 1766 enum { V1 = 0, V2 = 4 }; 1767 mMask = {V1 + 2, V2 + 2, V1 + 3, V2 + 3}; 1768 rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1}; 1769 break; 1770 } 1771 case 8: { 1772 enum { V1 = 0, V2 = 8 }; 1773 mMask = {V1 + 4, V2 + 4, V1 + 5, V2 + 5, V1 + 6, V2 + 6, V1 + 7, V2 + 7}; 1774 rMask = {V2 + 0, V1 + 0, V2 + 1, V1 + 1, V2 + 2, V1 + 2, V2 + 3, V1 + 3}; 1775 break; 1776 } 1777 case 16: 1778 mMask = {0x08, 0x18, 0x09, 0x19, 0x0A, 0x1A, 0x0B, 0x1B, 1779 0x0C, 0x1C, 0x0D, 0x1D, 0x0E, 0x1E, 0x0F, 0x1F}; 1780 rMask = {0x10, 0x00, 0x11, 0x01, 0x12, 0x02, 0x13, 0x03, 1781 0x14, 0x04, 0x15, 0x05, 0x16, 0x06, 0x17, 0x07}; 1782 break; 1783 default: 1784 llvm_unreachable("unexpected vector length"); 1785 } 1786 break; 1787 } 1788 default: 1789 llvm_unreachable("invalid vector operation for generator"); 1790 } 1791 1792 auto vargs{convertVecArgs(builder, loc, vecTyInfo, argBases)}; 1793 1794 llvm::SmallVector<int64_t, 16> &mergeMask = 1795 (isBEVecElemOrderOnLE()) ? rMask : mMask; 1796 1797 auto callOp{builder.create<mlir::vector::ShuffleOp>(loc, vargs[0], vargs[1], 1798 mergeMask)}; 1799 return builder.createConvert(loc, resultType, callOp); 1800 } 1801 1802 static mlir::Value addOffsetToAddress(fir::FirOpBuilder &builder, 1803 mlir::Location loc, mlir::Value baseAddr, 1804 mlir::Value offset) { 1805 auto typeExtent{fir::SequenceType::getUnknownExtent()}; 1806 // Construct an !fir.ref<!ref.array<?xi8>> type 1807 auto arrRefTy{builder.getRefType(fir::SequenceType::get( 1808 {typeExtent}, mlir::IntegerType::get(builder.getContext(), 8)))}; 1809 // Convert arg to !fir.ref<!ref.array<?xi8>> 1810 auto resAddr{builder.create<fir::ConvertOp>(loc, arrRefTy, baseAddr)}; 1811 1812 return builder.create<fir::CoordinateOp>(loc, arrRefTy, resAddr, offset); 1813 } 1814 1815 static mlir::Value reverseVectorElements(fir::FirOpBuilder &builder, 1816 mlir::Location loc, mlir::Value v, 1817 int64_t len) { 1818 assert(mlir::isa<mlir::VectorType>(v.getType())); 1819 assert(len > 0); 1820 llvm::SmallVector<int64_t, 16> mask; 1821 for (int64_t i = 0; i < len; ++i) { 1822 mask.push_back(len - 1 - i); 1823 } 1824 auto undefVec{builder.create<fir::UndefOp>(loc, v.getType())}; 1825 return builder.create<mlir::vector::ShuffleOp>(loc, v, undefVec, mask); 1826 } 1827 1828 static mlir::NamedAttribute getAlignmentAttr(fir::FirOpBuilder &builder, 1829 const int val) { 1830 auto i64ty{mlir::IntegerType::get(builder.getContext(), 64)}; 1831 auto alignAttr{mlir::IntegerAttr::get(i64ty, val)}; 1832 return builder.getNamedAttr("alignment", alignAttr); 1833 } 1834 1835 fir::ExtendedValue 1836 PPCIntrinsicLibrary::genVecXlGrp(mlir::Type resultType, 1837 llvm::ArrayRef<fir::ExtendedValue> args) { 1838 VecTypeInfo vecTyInfo{getVecTypeFromFirType(resultType)}; 1839 switch (vecTyInfo.eleTy.getIntOrFloatBitWidth()) { 1840 case 8: 1841 // vec_xlb1 1842 return genVecLdNoCallGrp<VecOp::Xl>(resultType, args); 1843 case 16: 1844 // vec_xlh8 1845 return genVecLdNoCallGrp<VecOp::Xl>(resultType, args); 1846 case 32: 1847 // vec_xlw4 1848 return genVecLdCallGrp<VecOp::Xlw4>(resultType, args); 1849 case 64: 1850 // vec_xld2 1851 return genVecLdCallGrp<VecOp::Xld2>(resultType, args); 1852 default: 1853 llvm_unreachable("invalid kind"); 1854 } 1855 llvm_unreachable("invalid vector operation for generator"); 1856 } 1857 1858 template <VecOp vop> 1859 fir::ExtendedValue PPCIntrinsicLibrary::genVecLdNoCallGrp( 1860 mlir::Type resultType, llvm::ArrayRef<fir::ExtendedValue> args) { 1861 assert(args.size() == 2); 1862 auto arg0{getBase(args[0])}; 1863 auto arg1{getBase(args[1])}; 1864 1865 auto vecTyInfo{getVecTypeFromFirType(resultType)}; 1866 auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())}; 1867 auto firTy{vecTyInfo.toFirVectorType()}; 1868 1869 // Add the %val of arg0 to %addr of arg1 1870 auto addr{addOffsetToAddress(builder, loc, arg1, arg0)}; 1871 1872 const auto triple{fir::getTargetTriple(builder.getModule())}; 1873 // Need to get align 1. 1874 auto result{builder.create<fir::LoadOp>(loc, mlirTy, addr, 1875 getAlignmentAttr(builder, 1))}; 1876 if ((vop == VecOp::Xl && isBEVecElemOrderOnLE()) || 1877 (vop == VecOp::Xlbe && triple.isLittleEndian())) 1878 return builder.createConvert( 1879 loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len)); 1880 1881 return builder.createConvert(loc, firTy, result); 1882 } 1883 1884 // VEC_LD, VEC_LDE, VEC_LDL, VEC_LXVP, VEC_XLD2, VEC_XLW4 1885 template <VecOp vop> 1886 fir::ExtendedValue 1887 PPCIntrinsicLibrary::genVecLdCallGrp(mlir::Type resultType, 1888 llvm::ArrayRef<fir::ExtendedValue> args) { 1889 assert(args.size() == 2); 1890 auto context{builder.getContext()}; 1891 auto arg0{getBase(args[0])}; 1892 auto arg1{getBase(args[1])}; 1893 1894 // Prepare the return type in FIR. 1895 auto vecResTyInfo{getVecTypeFromFirType(resultType)}; 1896 auto mlirTy{vecResTyInfo.toMlirVectorType(context)}; 1897 auto firTy{vecResTyInfo.toFirVectorType()}; 1898 1899 // llvm.ppc.altivec.lvx* returns <4xi32> 1900 // Others, like "llvm.ppc.altivec.lvebx" too if arg2 is not of Integer type 1901 const auto i32Ty{mlir::IntegerType::get(builder.getContext(), 32)}; 1902 const auto mVecI32Ty{mlir::VectorType::get(4, i32Ty)}; 1903 1904 // For vec_ld, need to convert arg0 from i64 to i32 1905 if (vop == VecOp::Ld && arg0.getType().getIntOrFloatBitWidth() == 64) 1906 arg0 = builder.createConvert(loc, i32Ty, arg0); 1907 1908 // Add the %val of arg0 to %addr of arg1 1909 auto addr{addOffsetToAddress(builder, loc, arg1, arg0)}; 1910 llvm::SmallVector<mlir::Value, 4> parsedArgs{addr}; 1911 1912 mlir::Type intrinResTy{nullptr}; 1913 llvm::StringRef fname{}; 1914 switch (vop) { 1915 case VecOp::Ld: 1916 fname = "llvm.ppc.altivec.lvx"; 1917 intrinResTy = mVecI32Ty; 1918 break; 1919 case VecOp::Lde: 1920 switch (vecResTyInfo.eleTy.getIntOrFloatBitWidth()) { 1921 case 8: 1922 fname = "llvm.ppc.altivec.lvebx"; 1923 intrinResTy = mlirTy; 1924 break; 1925 case 16: 1926 fname = "llvm.ppc.altivec.lvehx"; 1927 intrinResTy = mlirTy; 1928 break; 1929 case 32: 1930 fname = "llvm.ppc.altivec.lvewx"; 1931 if (mlir::isa<mlir::IntegerType>(vecResTyInfo.eleTy)) 1932 intrinResTy = mlirTy; 1933 else 1934 intrinResTy = mVecI32Ty; 1935 break; 1936 default: 1937 llvm_unreachable("invalid vector for vec_lde"); 1938 } 1939 break; 1940 case VecOp::Ldl: 1941 fname = "llvm.ppc.altivec.lvxl"; 1942 intrinResTy = mVecI32Ty; 1943 break; 1944 case VecOp::Lxvp: 1945 fname = "llvm.ppc.vsx.lxvp"; 1946 intrinResTy = fir::VectorType::get(256, mlir::IntegerType::get(context, 1)); 1947 break; 1948 case VecOp::Xld2: { 1949 fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvd2x.be" 1950 : "llvm.ppc.vsx.lxvd2x"; 1951 // llvm.ppc.altivec.lxvd2x* returns <2 x double> 1952 intrinResTy = mlir::VectorType::get(2, mlir::Float64Type::get(context)); 1953 } break; 1954 case VecOp::Xlw4: 1955 fname = isBEVecElemOrderOnLE() ? "llvm.ppc.vsx.lxvw4x.be" 1956 : "llvm.ppc.vsx.lxvw4x"; 1957 // llvm.ppc.altivec.lxvw4x* returns <4xi32> 1958 intrinResTy = mVecI32Ty; 1959 break; 1960 default: 1961 llvm_unreachable("invalid vector operation for generator"); 1962 } 1963 1964 auto funcType{ 1965 mlir::FunctionType::get(context, {addr.getType()}, {intrinResTy})}; 1966 auto funcOp{builder.createFunction(loc, fname, funcType)}; 1967 auto result{ 1968 builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)}; 1969 1970 if (vop == VecOp::Lxvp) 1971 return result; 1972 1973 if (intrinResTy != mlirTy) 1974 result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, result); 1975 1976 if (vop != VecOp::Xld2 && vop != VecOp::Xlw4 && isBEVecElemOrderOnLE()) 1977 return builder.createConvert( 1978 loc, firTy, 1979 reverseVectorElements(builder, loc, result, vecResTyInfo.len)); 1980 1981 return builder.createConvert(loc, firTy, result); 1982 } 1983 1984 // VEC_LVSL, VEC_LVSR 1985 template <VecOp vop> 1986 fir::ExtendedValue 1987 PPCIntrinsicLibrary::genVecLvsGrp(mlir::Type resultType, 1988 llvm::ArrayRef<fir::ExtendedValue> args) { 1989 assert(args.size() == 2); 1990 auto context{builder.getContext()}; 1991 auto arg0{getBase(args[0])}; 1992 auto arg1{getBase(args[1])}; 1993 1994 auto vecTyInfo{getVecTypeFromFirType(resultType)}; 1995 auto mlirTy{vecTyInfo.toMlirVectorType(context)}; 1996 auto firTy{vecTyInfo.toFirVectorType()}; 1997 1998 // Convert arg0 to i64 type if needed 1999 auto i64ty{mlir::IntegerType::get(context, 64)}; 2000 if (arg0.getType() != i64ty) 2001 arg0 = builder.create<fir::ConvertOp>(loc, i64ty, arg0); 2002 2003 // offset is modulo 16, so shift left 56 bits and then right 56 bits to clear 2004 // upper 56 bit while preserving sign 2005 auto shiftVal{builder.createIntegerConstant(loc, i64ty, 56)}; 2006 auto offset{builder.create<mlir::arith::ShLIOp>(loc, arg0, shiftVal)}; 2007 auto offset2{builder.create<mlir::arith::ShRSIOp>(loc, offset, shiftVal)}; 2008 2009 // Add the offsetArg to %addr of arg1 2010 auto addr{addOffsetToAddress(builder, loc, arg1, offset2)}; 2011 llvm::SmallVector<mlir::Value, 4> parsedArgs{addr}; 2012 2013 llvm::StringRef fname{}; 2014 switch (vop) { 2015 case VecOp::Lvsl: 2016 fname = "llvm.ppc.altivec.lvsl"; 2017 break; 2018 case VecOp::Lvsr: 2019 fname = "llvm.ppc.altivec.lvsr"; 2020 break; 2021 default: 2022 llvm_unreachable("invalid vector operation for generator"); 2023 } 2024 auto funcType{mlir::FunctionType::get(context, {addr.getType()}, {mlirTy})}; 2025 auto funcOp{builder.createFunction(loc, fname, funcType)}; 2026 auto result{ 2027 builder.create<fir::CallOp>(loc, funcOp, parsedArgs).getResult(0)}; 2028 2029 if (isNativeVecElemOrderOnLE()) 2030 return builder.createConvert( 2031 loc, firTy, reverseVectorElements(builder, loc, result, vecTyInfo.len)); 2032 2033 return builder.createConvert(loc, firTy, result); 2034 } 2035 2036 // VEC_NMADD, VEC_MSUB 2037 template <VecOp vop> 2038 fir::ExtendedValue 2039 PPCIntrinsicLibrary::genVecNmaddMsub(mlir::Type resultType, 2040 llvm::ArrayRef<fir::ExtendedValue> args) { 2041 assert(args.size() == 3); 2042 auto context{builder.getContext()}; 2043 auto argBases{getBasesForArgs(args)}; 2044 auto vTypeInfo{getVecTypeFromFir(argBases[0])}; 2045 auto newArgs{convertVecArgs(builder, loc, vTypeInfo, argBases)}; 2046 const auto width{vTypeInfo.eleTy.getIntOrFloatBitWidth()}; 2047 2048 static std::map<int, std::pair<llvm::StringRef, mlir::FunctionType>> fmaMap{ 2049 {32, 2050 std::make_pair( 2051 "llvm.fma.v4f32", 2052 genFuncType<Ty::RealVector<4>, Ty::RealVector<4>, Ty::RealVector<4>>( 2053 context, builder))}, 2054 {64, 2055 std::make_pair( 2056 "llvm.fma.v2f64", 2057 genFuncType<Ty::RealVector<8>, Ty::RealVector<8>, Ty::RealVector<8>>( 2058 context, builder))}}; 2059 2060 auto funcOp{builder.createFunction(loc, std::get<0>(fmaMap[width]), 2061 std::get<1>(fmaMap[width]))}; 2062 if (vop == VecOp::Nmadd) { 2063 // vec_nmadd(arg1, arg2, arg3) = -fma(arg1, arg2, arg3) 2064 auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)}; 2065 2066 // We need to convert fir.vector to MLIR vector to use fneg and then back 2067 // to fir.vector to store. 2068 auto vCall{builder.createConvert(loc, vTypeInfo.toMlirVectorType(context), 2069 callOp.getResult(0))}; 2070 auto neg{builder.create<mlir::arith::NegFOp>(loc, vCall)}; 2071 return builder.createConvert(loc, vTypeInfo.toFirVectorType(), neg); 2072 } else if (vop == VecOp::Msub) { 2073 // vec_msub(arg1, arg2, arg3) = fma(arg1, arg2, -arg3) 2074 newArgs[2] = builder.create<mlir::arith::NegFOp>(loc, newArgs[2]); 2075 2076 auto callOp{builder.create<fir::CallOp>(loc, funcOp, newArgs)}; 2077 return callOp.getResult(0); 2078 } 2079 llvm_unreachable("Invalid vector operation for generator"); 2080 } 2081 2082 // VEC_PERM, VEC_PERMI 2083 template <VecOp vop> 2084 fir::ExtendedValue 2085 PPCIntrinsicLibrary::genVecPerm(mlir::Type resultType, 2086 llvm::ArrayRef<fir::ExtendedValue> args) { 2087 assert(args.size() == 3); 2088 auto context{builder.getContext()}; 2089 auto argBases{getBasesForArgs(args)}; 2090 auto argTypes{getTypesForArgs(argBases)}; 2091 auto vecTyInfo{getVecTypeFromFir(argBases[0])}; 2092 auto mlirTy{vecTyInfo.toMlirVectorType(context)}; 2093 2094 auto vi32Ty{mlir::VectorType::get(4, mlir::IntegerType::get(context, 32))}; 2095 auto vf64Ty{mlir::VectorType::get(2, mlir::Float64Type::get(context))}; 2096 2097 auto mArg0{builder.createConvert(loc, mlirTy, argBases[0])}; 2098 auto mArg1{builder.createConvert(loc, mlirTy, argBases[1])}; 2099 2100 switch (vop) { 2101 case VecOp::Perm: { 2102 VecTypeInfo maskVecTyInfo{getVecTypeFromFir(argBases[2])}; 2103 auto mlirMaskTy{maskVecTyInfo.toMlirVectorType(context)}; 2104 auto mMask{builder.createConvert(loc, mlirMaskTy, argBases[2])}; 2105 2106 if (mlirTy != vi32Ty) { 2107 mArg0 = 2108 builder.create<mlir::LLVM::BitcastOp>(loc, vi32Ty, mArg0).getResult(); 2109 mArg1 = 2110 builder.create<mlir::LLVM::BitcastOp>(loc, vi32Ty, mArg1).getResult(); 2111 } 2112 2113 auto funcOp{builder.createFunction( 2114 loc, "llvm.ppc.altivec.vperm", 2115 genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>, 2116 Ty::IntegerVector<4>, Ty::IntegerVector<1>>(context, 2117 builder))}; 2118 2119 llvm::SmallVector<mlir::Value> newArgs; 2120 if (isNativeVecElemOrderOnLE()) { 2121 auto i8Ty{mlir::IntegerType::get(context, 8)}; 2122 auto v8Ty{mlir::VectorType::get(16, i8Ty)}; 2123 auto negOne{builder.createMinusOneInteger(loc, i8Ty)}; 2124 auto vNegOne{ 2125 builder.create<mlir::vector::BroadcastOp>(loc, v8Ty, negOne)}; 2126 2127 mMask = builder.create<mlir::arith::XOrIOp>(loc, mMask, vNegOne); 2128 newArgs = {mArg1, mArg0, mMask}; 2129 } else { 2130 newArgs = {mArg0, mArg1, mMask}; 2131 } 2132 2133 auto res{builder.create<fir::CallOp>(loc, funcOp, newArgs).getResult(0)}; 2134 2135 if (res.getType() != argTypes[0]) { 2136 // fir.call llvm.ppc.altivec.vperm returns !fir.vector<i4:32> 2137 // convert the result back to the original type 2138 res = builder.createConvert(loc, vi32Ty, res); 2139 if (mlirTy != vi32Ty) 2140 res = 2141 builder.create<mlir::LLVM::BitcastOp>(loc, mlirTy, res).getResult(); 2142 } 2143 return builder.createConvert(loc, resultType, res); 2144 } 2145 case VecOp::Permi: { 2146 // arg3 is a constant 2147 auto constIntOp{mlir::dyn_cast_or_null<mlir::IntegerAttr>( 2148 mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[2].getDefiningOp()) 2149 .getValue())}; 2150 assert(constIntOp && "expected integer constant argument"); 2151 auto constInt{constIntOp.getInt()}; 2152 // arg1, arg2, and result type share same VecTypeInfo 2153 if (vecTyInfo.isFloat()) { 2154 mArg0 = 2155 builder.create<mlir::LLVM::BitcastOp>(loc, vf64Ty, mArg0).getResult(); 2156 mArg1 = 2157 builder.create<mlir::LLVM::BitcastOp>(loc, vf64Ty, mArg1).getResult(); 2158 } 2159 2160 llvm::SmallVector<int64_t, 2> nMask; // native vector element order mask 2161 llvm::SmallVector<int64_t, 2> rMask; // non-native vector element order mask 2162 enum { V1 = 0, V2 = 2 }; 2163 switch (constInt) { 2164 case 0: 2165 nMask = {V1 + 0, V2 + 0}; 2166 rMask = {V2 + 1, V1 + 1}; 2167 break; 2168 case 1: 2169 nMask = {V1 + 0, V2 + 1}; 2170 rMask = {V2 + 0, V1 + 1}; 2171 break; 2172 case 2: 2173 nMask = {V1 + 1, V2 + 0}; 2174 rMask = {V2 + 1, V1 + 0}; 2175 break; 2176 case 3: 2177 nMask = {V1 + 1, V2 + 1}; 2178 rMask = {V2 + 0, V1 + 0}; 2179 break; 2180 default: 2181 llvm_unreachable("unexpected arg3 value for vec_permi"); 2182 } 2183 2184 llvm::SmallVector<int64_t, 2> mask = 2185 (isBEVecElemOrderOnLE()) ? rMask : nMask; 2186 auto res{builder.create<mlir::vector::ShuffleOp>(loc, mArg0, mArg1, mask)}; 2187 if (res.getType() != mlirTy) { 2188 auto cast{builder.create<mlir::LLVM::BitcastOp>(loc, mlirTy, res)}; 2189 return builder.createConvert(loc, resultType, cast); 2190 } 2191 return builder.createConvert(loc, resultType, res); 2192 } 2193 default: 2194 llvm_unreachable("invalid vector operation for generator"); 2195 } 2196 } 2197 2198 // VEC_SEL 2199 fir::ExtendedValue 2200 PPCIntrinsicLibrary::genVecSel(mlir::Type resultType, 2201 llvm::ArrayRef<fir::ExtendedValue> args) { 2202 assert(args.size() == 3); 2203 auto argBases{getBasesForArgs(args)}; 2204 llvm::SmallVector<VecTypeInfo, 4> vecTyInfos; 2205 for (size_t i = 0; i < argBases.size(); i++) { 2206 vecTyInfos.push_back(getVecTypeFromFir(argBases[i])); 2207 } 2208 auto vargs{convertVecArgs(builder, loc, vecTyInfos, argBases)}; 2209 2210 auto i8Ty{mlir::IntegerType::get(builder.getContext(), 8)}; 2211 auto negOne{builder.createMinusOneInteger(loc, i8Ty)}; 2212 2213 // construct a constant <16 x i8> vector with value -1 for bitcast 2214 auto bcVecTy{mlir::VectorType::get(16, i8Ty)}; 2215 auto vNegOne{builder.create<mlir::vector::BroadcastOp>(loc, bcVecTy, negOne)}; 2216 2217 // bitcast arguments to bcVecTy 2218 auto arg1{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[0])}; 2219 auto arg2{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[1])}; 2220 auto arg3{builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, vargs[2])}; 2221 2222 // vec_sel(arg1, arg2, arg3) = 2223 // (arg2 and arg3) or (arg1 and (arg3 xor vector(-1,...))) 2224 auto comp{builder.create<mlir::arith::XOrIOp>(loc, arg3, vNegOne)}; 2225 auto a1AndComp{builder.create<mlir::arith::AndIOp>(loc, arg1, comp)}; 2226 auto a1OrA2{builder.create<mlir::arith::AndIOp>(loc, arg2, arg3)}; 2227 auto res{builder.create<mlir::arith::OrIOp>(loc, a1AndComp, a1OrA2)}; 2228 2229 auto bcRes{ 2230 builder.create<mlir::vector::BitCastOp>(loc, vargs[0].getType(), res)}; 2231 2232 return builder.createConvert(loc, vecTyInfos[0].toFirVectorType(), bcRes); 2233 } 2234 2235 // VEC_SL, VEC_SLD, VEC_SLDW, VEC_SLL, VEC_SLO, VEC_SR, VEC_SRL, VEC_SRO 2236 template <VecOp vop> 2237 fir::ExtendedValue 2238 PPCIntrinsicLibrary::genVecShift(mlir::Type resultType, 2239 llvm::ArrayRef<fir::ExtendedValue> args) { 2240 auto context{builder.getContext()}; 2241 auto argBases{getBasesForArgs(args)}; 2242 auto argTypes{getTypesForArgs(argBases)}; 2243 2244 llvm::SmallVector<VecTypeInfo, 2> vecTyInfoArgs; 2245 vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[0])); 2246 vecTyInfoArgs.push_back(getVecTypeFromFir(argBases[1])); 2247 2248 // Convert the first two arguments to MLIR vectors 2249 llvm::SmallVector<mlir::Type, 2> mlirTyArgs; 2250 mlirTyArgs.push_back(vecTyInfoArgs[0].toMlirVectorType(context)); 2251 mlirTyArgs.push_back(vecTyInfoArgs[1].toMlirVectorType(context)); 2252 2253 llvm::SmallVector<mlir::Value, 2> mlirVecArgs; 2254 mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[0], argBases[0])); 2255 mlirVecArgs.push_back(builder.createConvert(loc, mlirTyArgs[1], argBases[1])); 2256 2257 mlir::Value shftRes{nullptr}; 2258 2259 if (vop == VecOp::Sl || vop == VecOp::Sr) { 2260 assert(args.size() == 2); 2261 // Construct the mask 2262 auto width{ 2263 mlir::dyn_cast<mlir::IntegerType>(vecTyInfoArgs[1].eleTy).getWidth()}; 2264 auto vecVal{builder.createIntegerConstant( 2265 loc, getConvertedElementType(context, vecTyInfoArgs[0].eleTy), width)}; 2266 auto mask{ 2267 builder.create<mlir::vector::BroadcastOp>(loc, mlirTyArgs[1], vecVal)}; 2268 auto shft{builder.create<mlir::arith::RemUIOp>(loc, mlirVecArgs[1], mask)}; 2269 2270 mlir::Value res{nullptr}; 2271 if (vop == VecOp::Sr) 2272 res = builder.create<mlir::arith::ShRUIOp>(loc, mlirVecArgs[0], shft); 2273 else if (vop == VecOp::Sl) 2274 res = builder.create<mlir::arith::ShLIOp>(loc, mlirVecArgs[0], shft); 2275 2276 shftRes = builder.createConvert(loc, argTypes[0], res); 2277 } else if (vop == VecOp::Sll || vop == VecOp::Slo || vop == VecOp::Srl || 2278 vop == VecOp::Sro) { 2279 assert(args.size() == 2); 2280 2281 // Bitcast to vector<4xi32> 2282 auto bcVecTy{mlir::VectorType::get(4, builder.getIntegerType(32))}; 2283 if (mlirTyArgs[0] != bcVecTy) 2284 mlirVecArgs[0] = 2285 builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, mlirVecArgs[0]); 2286 if (mlirTyArgs[1] != bcVecTy) 2287 mlirVecArgs[1] = 2288 builder.create<mlir::vector::BitCastOp>(loc, bcVecTy, mlirVecArgs[1]); 2289 2290 llvm::StringRef funcName; 2291 switch (vop) { 2292 case VecOp::Srl: 2293 funcName = "llvm.ppc.altivec.vsr"; 2294 break; 2295 case VecOp::Sro: 2296 funcName = "llvm.ppc.altivec.vsro"; 2297 break; 2298 case VecOp::Sll: 2299 funcName = "llvm.ppc.altivec.vsl"; 2300 break; 2301 case VecOp::Slo: 2302 funcName = "llvm.ppc.altivec.vslo"; 2303 break; 2304 default: 2305 llvm_unreachable("unknown vector shift operation"); 2306 } 2307 auto funcTy{genFuncType<Ty::IntegerVector<4>, Ty::IntegerVector<4>, 2308 Ty::IntegerVector<4>>(context, builder)}; 2309 mlir::func::FuncOp funcOp{builder.createFunction(loc, funcName, funcTy)}; 2310 auto callOp{builder.create<fir::CallOp>(loc, funcOp, mlirVecArgs)}; 2311 2312 // If the result vector type is different from the original type, need 2313 // to convert to mlir vector, bitcast and then convert back to fir vector. 2314 if (callOp.getResult(0).getType() != argTypes[0]) { 2315 auto res = builder.createConvert(loc, bcVecTy, callOp.getResult(0)); 2316 res = builder.create<mlir::vector::BitCastOp>(loc, mlirTyArgs[0], res); 2317 shftRes = builder.createConvert(loc, argTypes[0], res); 2318 } else { 2319 shftRes = callOp.getResult(0); 2320 } 2321 } else if (vop == VecOp::Sld || vop == VecOp::Sldw) { 2322 assert(args.size() == 3); 2323 auto constIntOp = mlir::dyn_cast_or_null<mlir::IntegerAttr>( 2324 mlir::dyn_cast<mlir::arith::ConstantOp>(argBases[2].getDefiningOp()) 2325 .getValue()); 2326 assert(constIntOp && "expected integer constant argument"); 2327 2328 // Bitcast to vector<16xi8> 2329 auto vi8Ty{mlir::VectorType::get(16, builder.getIntegerType(8))}; 2330 if (mlirTyArgs[0] != vi8Ty) { 2331 mlirVecArgs[0] = 2332 builder.create<mlir::LLVM::BitcastOp>(loc, vi8Ty, mlirVecArgs[0]) 2333 .getResult(); 2334 mlirVecArgs[1] = 2335 builder.create<mlir::LLVM::BitcastOp>(loc, vi8Ty, mlirVecArgs[1]) 2336 .getResult(); 2337 } 2338 2339 // Construct the mask for shuffling 2340 auto shiftVal{constIntOp.getInt()}; 2341 if (vop == VecOp::Sldw) 2342 shiftVal = shiftVal << 2; 2343 shiftVal &= 0xF; 2344 llvm::SmallVector<int64_t, 16> mask; 2345 // Shuffle with mask based on the endianness 2346 const auto triple{fir::getTargetTriple(builder.getModule())}; 2347 if (triple.isLittleEndian()) { 2348 for (int i = 16; i < 32; ++i) 2349 mask.push_back(i - shiftVal); 2350 shftRes = builder.create<mlir::vector::ShuffleOp>(loc, mlirVecArgs[1], 2351 mlirVecArgs[0], mask); 2352 } else { 2353 for (int i = 0; i < 16; ++i) 2354 mask.push_back(i + shiftVal); 2355 shftRes = builder.create<mlir::vector::ShuffleOp>(loc, mlirVecArgs[0], 2356 mlirVecArgs[1], mask); 2357 } 2358 2359 // Bitcast to the original type 2360 if (shftRes.getType() != mlirTyArgs[0]) 2361 shftRes = 2362 builder.create<mlir::LLVM::BitcastOp>(loc, mlirTyArgs[0], shftRes); 2363 2364 return builder.createConvert(loc, resultType, shftRes); 2365 } else 2366 llvm_unreachable("Invalid vector operation for generator"); 2367 2368 return shftRes; 2369 } 2370 2371 // VEC_SPLAT, VEC_SPLATS, VEC_SPLAT_S32 2372 template <VecOp vop> 2373 fir::ExtendedValue 2374 PPCIntrinsicLibrary::genVecSplat(mlir::Type resultType, 2375 llvm::ArrayRef<fir::ExtendedValue> args) { 2376 auto context{builder.getContext()}; 2377 auto argBases{getBasesForArgs(args)}; 2378 2379 mlir::vector::SplatOp splatOp{nullptr}; 2380 mlir::Type retTy{nullptr}; 2381 switch (vop) { 2382 case VecOp::Splat: { 2383 assert(args.size() == 2); 2384 auto vecTyInfo{getVecTypeFromFir(argBases[0])}; 2385 2386 auto extractOp{genVecExtract(resultType, args)}; 2387 splatOp = builder.create<mlir::vector::SplatOp>( 2388 loc, *(extractOp.getUnboxed()), vecTyInfo.toMlirVectorType(context)); 2389 retTy = vecTyInfo.toFirVectorType(); 2390 break; 2391 } 2392 case VecOp::Splats: { 2393 assert(args.size() == 1); 2394 auto vecTyInfo{getVecTypeFromEle(argBases[0])}; 2395 2396 splatOp = builder.create<mlir::vector::SplatOp>( 2397 loc, argBases[0], vecTyInfo.toMlirVectorType(context)); 2398 retTy = vecTyInfo.toFirVectorType(); 2399 break; 2400 } 2401 case VecOp::Splat_s32: { 2402 assert(args.size() == 1); 2403 auto eleTy{builder.getIntegerType(32)}; 2404 auto intOp{builder.createConvert(loc, eleTy, argBases[0])}; 2405 2406 // the intrinsic always returns vector(integer(4)) 2407 splatOp = builder.create<mlir::vector::SplatOp>( 2408 loc, intOp, mlir::VectorType::get(4, eleTy)); 2409 retTy = fir::VectorType::get(4, eleTy); 2410 break; 2411 } 2412 default: 2413 llvm_unreachable("invalid vector operation for generator"); 2414 } 2415 return builder.createConvert(loc, retTy, splatOp); 2416 } 2417 2418 fir::ExtendedValue 2419 PPCIntrinsicLibrary::genVecXlds(mlir::Type resultType, 2420 llvm::ArrayRef<fir::ExtendedValue> args) { 2421 assert(args.size() == 2); 2422 auto arg0{getBase(args[0])}; 2423 auto arg1{getBase(args[1])}; 2424 2425 // Prepare the return type in FIR. 2426 auto vecTyInfo{getVecTypeFromFirType(resultType)}; 2427 auto mlirTy{vecTyInfo.toMlirVectorType(builder.getContext())}; 2428 auto firTy{vecTyInfo.toFirVectorType()}; 2429 2430 // Add the %val of arg0 to %addr of arg1 2431 auto addr{addOffsetToAddress(builder, loc, arg1, arg0)}; 2432 2433 auto i64Ty{mlir::IntegerType::get(builder.getContext(), 64)}; 2434 auto i64VecTy{mlir::VectorType::get(2, i64Ty)}; 2435 auto i64RefTy{builder.getRefType(i64Ty)}; 2436 auto addrConv{builder.create<fir::ConvertOp>(loc, i64RefTy, addr)}; 2437 2438 auto addrVal{builder.create<fir::LoadOp>(loc, addrConv)}; 2439 auto splatRes{builder.create<mlir::vector::SplatOp>(loc, addrVal, i64VecTy)}; 2440 2441 mlir::Value result{nullptr}; 2442 if (mlirTy != splatRes.getType()) { 2443 result = builder.create<mlir::vector::BitCastOp>(loc, mlirTy, splatRes); 2444 } else 2445 result = splatRes; 2446 2447 return builder.createConvert(loc, firTy, result); 2448 } 2449 2450 const char *getMmaIrIntrName(MMAOp mmaOp) { 2451 switch (mmaOp) { 2452 case MMAOp::AssembleAcc: 2453 return "llvm.ppc.mma.assemble.acc"; 2454 case MMAOp::AssemblePair: 2455 return "llvm.ppc.vsx.assemble.pair"; 2456 case MMAOp::DisassembleAcc: 2457 return "llvm.ppc.mma.disassemble.acc"; 2458 case MMAOp::DisassemblePair: 2459 return "llvm.ppc.vsx.disassemble.pair"; 2460 case MMAOp::Xxmfacc: 2461 return "llvm.ppc.mma.xxmfacc"; 2462 case MMAOp::Xxmtacc: 2463 return "llvm.ppc.mma.xxmtacc"; 2464 case MMAOp::Xxsetaccz: 2465 return "llvm.ppc.mma.xxsetaccz"; 2466 case MMAOp::Pmxvbf16ger2: 2467 return "llvm.ppc.mma.pmxvbf16ger2"; 2468 case MMAOp::Pmxvbf16ger2nn: 2469 return "llvm.ppc.mma.pmxvbf16ger2nn"; 2470 case MMAOp::Pmxvbf16ger2np: 2471 return "llvm.ppc.mma.pmxvbf16ger2np"; 2472 case MMAOp::Pmxvbf16ger2pn: 2473 return "llvm.ppc.mma.pmxvbf16ger2pn"; 2474 case MMAOp::Pmxvbf16ger2pp: 2475 return "llvm.ppc.mma.pmxvbf16ger2pp"; 2476 case MMAOp::Pmxvf16ger2: 2477 return "llvm.ppc.mma.pmxvf16ger2"; 2478 case MMAOp::Pmxvf16ger2nn: 2479 return "llvm.ppc.mma.pmxvf16ger2nn"; 2480 case MMAOp::Pmxvf16ger2np: 2481 return "llvm.ppc.mma.pmxvf16ger2np"; 2482 case MMAOp::Pmxvf16ger2pn: 2483 return "llvm.ppc.mma.pmxvf16ger2pn"; 2484 case MMAOp::Pmxvf16ger2pp: 2485 return "llvm.ppc.mma.pmxvf16ger2pp"; 2486 case MMAOp::Pmxvf32ger: 2487 return "llvm.ppc.mma.pmxvf32ger"; 2488 case MMAOp::Pmxvf32gernn: 2489 return "llvm.ppc.mma.pmxvf32gernn"; 2490 case MMAOp::Pmxvf32gernp: 2491 return "llvm.ppc.mma.pmxvf32gernp"; 2492 case MMAOp::Pmxvf32gerpn: 2493 return "llvm.ppc.mma.pmxvf32gerpn"; 2494 case MMAOp::Pmxvf32gerpp: 2495 return "llvm.ppc.mma.pmxvf32gerpp"; 2496 case MMAOp::Pmxvf64ger: 2497 return "llvm.ppc.mma.pmxvf64ger"; 2498 case MMAOp::Pmxvf64gernn: 2499 return "llvm.ppc.mma.pmxvf64gernn"; 2500 case MMAOp::Pmxvf64gernp: 2501 return "llvm.ppc.mma.pmxvf64gernp"; 2502 case MMAOp::Pmxvf64gerpn: 2503 return "llvm.ppc.mma.pmxvf64gerpn"; 2504 case MMAOp::Pmxvf64gerpp: 2505 return "llvm.ppc.mma.pmxvf64gerpp"; 2506 case MMAOp::Pmxvi16ger2: 2507 return "llvm.ppc.mma.pmxvi16ger2"; 2508 case MMAOp::Pmxvi16ger2pp: 2509 return "llvm.ppc.mma.pmxvi16ger2pp"; 2510 case MMAOp::Pmxvi16ger2s: 2511 return "llvm.ppc.mma.pmxvi16ger2s"; 2512 case MMAOp::Pmxvi16ger2spp: 2513 return "llvm.ppc.mma.pmxvi16ger2spp"; 2514 case MMAOp::Pmxvi4ger8: 2515 return "llvm.ppc.mma.pmxvi4ger8"; 2516 case MMAOp::Pmxvi4ger8pp: 2517 return "llvm.ppc.mma.pmxvi4ger8pp"; 2518 case MMAOp::Pmxvi8ger4: 2519 return "llvm.ppc.mma.pmxvi8ger4"; 2520 case MMAOp::Pmxvi8ger4pp: 2521 return "llvm.ppc.mma.pmxvi8ger4pp"; 2522 case MMAOp::Pmxvi8ger4spp: 2523 return "llvm.ppc.mma.pmxvi8ger4spp"; 2524 case MMAOp::Xvbf16ger2: 2525 return "llvm.ppc.mma.xvbf16ger2"; 2526 case MMAOp::Xvbf16ger2nn: 2527 return "llvm.ppc.mma.xvbf16ger2nn"; 2528 case MMAOp::Xvbf16ger2np: 2529 return "llvm.ppc.mma.xvbf16ger2np"; 2530 case MMAOp::Xvbf16ger2pn: 2531 return "llvm.ppc.mma.xvbf16ger2pn"; 2532 case MMAOp::Xvbf16ger2pp: 2533 return "llvm.ppc.mma.xvbf16ger2pp"; 2534 case MMAOp::Xvf16ger2: 2535 return "llvm.ppc.mma.xvf16ger2"; 2536 case MMAOp::Xvf16ger2nn: 2537 return "llvm.ppc.mma.xvf16ger2nn"; 2538 case MMAOp::Xvf16ger2np: 2539 return "llvm.ppc.mma.xvf16ger2np"; 2540 case MMAOp::Xvf16ger2pn: 2541 return "llvm.ppc.mma.xvf16ger2pn"; 2542 case MMAOp::Xvf16ger2pp: 2543 return "llvm.ppc.mma.xvf16ger2pp"; 2544 case MMAOp::Xvf32ger: 2545 return "llvm.ppc.mma.xvf32ger"; 2546 case MMAOp::Xvf32gernn: 2547 return "llvm.ppc.mma.xvf32gernn"; 2548 case MMAOp::Xvf32gernp: 2549 return "llvm.ppc.mma.xvf32gernp"; 2550 case MMAOp::Xvf32gerpn: 2551 return "llvm.ppc.mma.xvf32gerpn"; 2552 case MMAOp::Xvf32gerpp: 2553 return "llvm.ppc.mma.xvf32gerpp"; 2554 case MMAOp::Xvf64ger: 2555 return "llvm.ppc.mma.xvf64ger"; 2556 case MMAOp::Xvf64gernn: 2557 return "llvm.ppc.mma.xvf64gernn"; 2558 case MMAOp::Xvf64gernp: 2559 return "llvm.ppc.mma.xvf64gernp"; 2560 case MMAOp::Xvf64gerpn: 2561 return "llvm.ppc.mma.xvf64gerpn"; 2562 case MMAOp::Xvf64gerpp: 2563 return "llvm.ppc.mma.xvf64gerpp"; 2564 case MMAOp::Xvi16ger2: 2565 return "llvm.ppc.mma.xvi16ger2"; 2566 case MMAOp::Xvi16ger2pp: 2567 return "llvm.ppc.mma.xvi16ger2pp"; 2568 case MMAOp::Xvi16ger2s: 2569 return "llvm.ppc.mma.xvi16ger2s"; 2570 case MMAOp::Xvi16ger2spp: 2571 return "llvm.ppc.mma.xvi16ger2spp"; 2572 case MMAOp::Xvi4ger8: 2573 return "llvm.ppc.mma.xvi4ger8"; 2574 case MMAOp::Xvi4ger8pp: 2575 return "llvm.ppc.mma.xvi4ger8pp"; 2576 case MMAOp::Xvi8ger4: 2577 return "llvm.ppc.mma.xvi8ger4"; 2578 case MMAOp::Xvi8ger4pp: 2579 return "llvm.ppc.mma.xvi8ger4pp"; 2580 case MMAOp::Xvi8ger4spp: 2581 return "llvm.ppc.mma.xvi8ger4spp"; 2582 } 2583 llvm_unreachable("getMmaIrIntrName"); 2584 } 2585 2586 mlir::FunctionType getMmaIrFuncType(mlir::MLIRContext *context, MMAOp mmaOp) { 2587 switch (mmaOp) { 2588 case MMAOp::AssembleAcc: 2589 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 4); 2590 case MMAOp::AssemblePair: 2591 return genMmaVpFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); 2592 case MMAOp::DisassembleAcc: 2593 return genMmaDisassembleFuncType(context, mmaOp); 2594 case MMAOp::DisassemblePair: 2595 return genMmaDisassembleFuncType(context, mmaOp); 2596 case MMAOp::Xxmfacc: 2597 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0); 2598 case MMAOp::Xxmtacc: 2599 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 0); 2600 case MMAOp::Xxsetaccz: 2601 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 0); 2602 case MMAOp::Pmxvbf16ger2: 2603 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, 2604 /*Integer*/ 3); 2605 case MMAOp::Pmxvbf16ger2nn: 2606 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2607 /*Integer*/ 3); 2608 case MMAOp::Pmxvbf16ger2np: 2609 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2610 /*Integer*/ 3); 2611 case MMAOp::Pmxvbf16ger2pn: 2612 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2613 /*Integer*/ 3); 2614 case MMAOp::Pmxvbf16ger2pp: 2615 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2616 /*Integer*/ 3); 2617 case MMAOp::Pmxvf16ger2: 2618 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, 2619 /*Integer*/ 3); 2620 case MMAOp::Pmxvf16ger2nn: 2621 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2622 /*Integer*/ 3); 2623 case MMAOp::Pmxvf16ger2np: 2624 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2625 /*Integer*/ 3); 2626 case MMAOp::Pmxvf16ger2pn: 2627 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2628 /*Integer*/ 3); 2629 case MMAOp::Pmxvf16ger2pp: 2630 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2631 /*Integer*/ 3); 2632 case MMAOp::Pmxvf32ger: 2633 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, 2634 /*Integer*/ 2); 2635 case MMAOp::Pmxvf32gernn: 2636 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2637 /*Integer*/ 2); 2638 case MMAOp::Pmxvf32gernp: 2639 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2640 /*Integer*/ 2); 2641 case MMAOp::Pmxvf32gerpn: 2642 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2643 /*Integer*/ 2); 2644 case MMAOp::Pmxvf32gerpp: 2645 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2646 /*Integer*/ 2); 2647 case MMAOp::Pmxvf64ger: 2648 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1, 2649 /*Integer*/ 2); 2650 case MMAOp::Pmxvf64gernn: 2651 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1, 2652 /*Integer*/ 2); 2653 case MMAOp::Pmxvf64gernp: 2654 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1, 2655 /*Integer*/ 2); 2656 case MMAOp::Pmxvf64gerpn: 2657 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1, 2658 /*Integer*/ 2); 2659 case MMAOp::Pmxvf64gerpp: 2660 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1, 2661 /*Integer*/ 2); 2662 case MMAOp::Pmxvi16ger2: 2663 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, 2664 /*Integer*/ 3); 2665 case MMAOp::Pmxvi16ger2pp: 2666 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2667 /*Integer*/ 3); 2668 case MMAOp::Pmxvi16ger2s: 2669 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, 2670 /*Integer*/ 3); 2671 case MMAOp::Pmxvi16ger2spp: 2672 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2673 /*Integer*/ 3); 2674 case MMAOp::Pmxvi4ger8: 2675 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, 2676 /*Integer*/ 3); 2677 case MMAOp::Pmxvi4ger8pp: 2678 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2679 /*Integer*/ 3); 2680 case MMAOp::Pmxvi8ger4: 2681 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2, 2682 /*Integer*/ 3); 2683 case MMAOp::Pmxvi8ger4pp: 2684 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2685 /*Integer*/ 3); 2686 case MMAOp::Pmxvi8ger4spp: 2687 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2, 2688 /*Integer*/ 3); 2689 case MMAOp::Xvbf16ger2: 2690 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); 2691 case MMAOp::Xvbf16ger2nn: 2692 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2693 case MMAOp::Xvbf16ger2np: 2694 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2695 case MMAOp::Xvbf16ger2pn: 2696 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2697 case MMAOp::Xvbf16ger2pp: 2698 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2699 case MMAOp::Xvf16ger2: 2700 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); 2701 case MMAOp::Xvf16ger2nn: 2702 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2703 case MMAOp::Xvf16ger2np: 2704 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2705 case MMAOp::Xvf16ger2pn: 2706 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2707 case MMAOp::Xvf16ger2pp: 2708 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2709 case MMAOp::Xvf32ger: 2710 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); 2711 case MMAOp::Xvf32gernn: 2712 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2713 case MMAOp::Xvf32gernp: 2714 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2715 case MMAOp::Xvf32gerpn: 2716 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2717 case MMAOp::Xvf32gerpp: 2718 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2719 case MMAOp::Xvf64ger: 2720 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 1, /*Vector*/ 1); 2721 case MMAOp::Xvf64gernn: 2722 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1); 2723 case MMAOp::Xvf64gernp: 2724 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1); 2725 case MMAOp::Xvf64gerpn: 2726 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1); 2727 case MMAOp::Xvf64gerpp: 2728 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 1, /*Vector*/ 1); 2729 case MMAOp::Xvi16ger2: 2730 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); 2731 case MMAOp::Xvi16ger2pp: 2732 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2733 case MMAOp::Xvi16ger2s: 2734 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); 2735 case MMAOp::Xvi16ger2spp: 2736 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2737 case MMAOp::Xvi4ger8: 2738 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); 2739 case MMAOp::Xvi4ger8pp: 2740 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2741 case MMAOp::Xvi8ger4: 2742 return genMmaVqFuncType(context, /*Quad*/ 0, /*Pair*/ 0, /*Vector*/ 2); 2743 case MMAOp::Xvi8ger4pp: 2744 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2745 case MMAOp::Xvi8ger4spp: 2746 return genMmaVqFuncType(context, /*Quad*/ 1, /*Pair*/ 0, /*Vector*/ 2); 2747 } 2748 llvm_unreachable("getMmaIrFuncType"); 2749 } 2750 2751 template <MMAOp IntrId, MMAHandlerOp HandlerOp> 2752 void PPCIntrinsicLibrary::genMmaIntr(llvm::ArrayRef<fir::ExtendedValue> args) { 2753 auto context{builder.getContext()}; 2754 mlir::FunctionType intrFuncType{getMmaIrFuncType(context, IntrId)}; 2755 mlir::func::FuncOp funcOp{ 2756 builder.createFunction(loc, getMmaIrIntrName(IntrId), intrFuncType)}; 2757 llvm::SmallVector<mlir::Value> intrArgs; 2758 2759 // Depending on SubToFunc, change the subroutine call to a function call. 2760 // First argument represents the result. Rest of the arguments 2761 // are shifted one position to form the actual argument list. 2762 size_t argStart{0}; 2763 size_t argStep{1}; 2764 size_t e{args.size()}; 2765 if (HandlerOp == MMAHandlerOp::SubToFunc) { 2766 // The first argument becomes function result. Start from the second 2767 // argument. 2768 argStart = 1; 2769 } else if (HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE) { 2770 // Reverse argument order on little-endian target only. 2771 // The reversal does not depend on the setting of non-native-order option. 2772 const auto triple{fir::getTargetTriple(builder.getModule())}; 2773 if (triple.isLittleEndian()) { 2774 // Load the arguments in reverse order. 2775 argStart = args.size() - 1; 2776 // The first argument becomes function result. Stop at the second 2777 // argument. 2778 e = 0; 2779 argStep = -1; 2780 } else { 2781 // Load the arguments in natural order. 2782 // The first argument becomes function result. Start from the second 2783 // argument. 2784 argStart = 1; 2785 } 2786 } 2787 2788 for (size_t i = argStart, j = 0; i != e; i += argStep, ++j) { 2789 auto v{fir::getBase(args[i])}; 2790 if (i == 0 && HandlerOp == MMAHandlerOp::FirstArgIsResult) { 2791 // First argument is passed in as an address. We need to load 2792 // the content to match the LLVM interface. 2793 v = builder.create<fir::LoadOp>(loc, v); 2794 } 2795 auto vType{v.getType()}; 2796 mlir::Type targetType{intrFuncType.getInput(j)}; 2797 if (vType != targetType) { 2798 if (mlir::isa<mlir::VectorType>(targetType)) { 2799 // Perform vector type conversion for arguments passed by value. 2800 auto eleTy{mlir::dyn_cast<fir::VectorType>(vType).getElementType()}; 2801 auto len{mlir::dyn_cast<fir::VectorType>(vType).getLen()}; 2802 mlir::VectorType mlirType = mlir::VectorType::get(len, eleTy); 2803 auto v0{builder.createConvert(loc, mlirType, v)}; 2804 auto v1{builder.create<mlir::vector::BitCastOp>(loc, targetType, v0)}; 2805 intrArgs.push_back(v1); 2806 } else if (mlir::isa<mlir::IntegerType>(targetType) && 2807 mlir::isa<mlir::IntegerType>(vType)) { 2808 auto v0{builder.createConvert(loc, targetType, v)}; 2809 intrArgs.push_back(v0); 2810 } else { 2811 llvm::errs() << "\nUnexpected type conversion requested: " 2812 << " from " << vType << " to " << targetType << "\n"; 2813 llvm_unreachable("Unsupported type conversion for argument to PowerPC " 2814 "MMA intrinsic"); 2815 } 2816 } else { 2817 intrArgs.push_back(v); 2818 } 2819 } 2820 auto callSt{builder.create<fir::CallOp>(loc, funcOp, intrArgs)}; 2821 if (HandlerOp == MMAHandlerOp::SubToFunc || 2822 HandlerOp == MMAHandlerOp::SubToFuncReverseArgOnLE || 2823 HandlerOp == MMAHandlerOp::FirstArgIsResult) { 2824 // Convert pointer type if needed. 2825 mlir::Value callResult{callSt.getResult(0)}; 2826 mlir::Value destPtr{fir::getBase(args[0])}; 2827 mlir::Type callResultPtrType{builder.getRefType(callResult.getType())}; 2828 if (destPtr.getType() != callResultPtrType) { 2829 destPtr = builder.create<fir::ConvertOp>(loc, callResultPtrType, destPtr); 2830 } 2831 // Copy the result. 2832 builder.create<fir::StoreOp>(loc, callResult, destPtr); 2833 } 2834 } 2835 2836 // VEC_ST, VEC_STE 2837 template <VecOp vop> 2838 void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef<fir::ExtendedValue> args) { 2839 assert(args.size() == 3); 2840 2841 auto context{builder.getContext()}; 2842 auto argBases{getBasesForArgs(args)}; 2843 auto arg1TyInfo{getVecTypeFromFir(argBases[0])}; 2844 2845 auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])}; 2846 2847 llvm::StringRef fname{}; 2848 mlir::VectorType stTy{nullptr}; 2849 auto i32ty{mlir::IntegerType::get(context, 32)}; 2850 switch (vop) { 2851 case VecOp::St: 2852 stTy = mlir::VectorType::get(4, i32ty); 2853 fname = "llvm.ppc.altivec.stvx"; 2854 break; 2855 case VecOp::Ste: { 2856 const auto width{arg1TyInfo.eleTy.getIntOrFloatBitWidth()}; 2857 const auto len{arg1TyInfo.len}; 2858 2859 if (arg1TyInfo.isFloat32()) { 2860 stTy = mlir::VectorType::get(len, i32ty); 2861 fname = "llvm.ppc.altivec.stvewx"; 2862 } else if (mlir::isa<mlir::IntegerType>(arg1TyInfo.eleTy)) { 2863 stTy = mlir::VectorType::get(len, mlir::IntegerType::get(context, width)); 2864 2865 switch (width) { 2866 case 8: 2867 fname = "llvm.ppc.altivec.stvebx"; 2868 break; 2869 case 16: 2870 fname = "llvm.ppc.altivec.stvehx"; 2871 break; 2872 case 32: 2873 fname = "llvm.ppc.altivec.stvewx"; 2874 break; 2875 default: 2876 assert(false && "invalid element size"); 2877 } 2878 } else 2879 assert(false && "unknown type"); 2880 break; 2881 } 2882 case VecOp::Stxvp: 2883 // __vector_pair type 2884 stTy = mlir::VectorType::get(256, mlir::IntegerType::get(context, 1)); 2885 fname = "llvm.ppc.vsx.stxvp"; 2886 break; 2887 default: 2888 llvm_unreachable("invalid vector operation for generator"); 2889 } 2890 2891 auto funcType{ 2892 mlir::FunctionType::get(context, {stTy, addr.getType()}, std::nullopt)}; 2893 mlir::func::FuncOp funcOp = builder.createFunction(loc, fname, funcType); 2894 2895 llvm::SmallVector<mlir::Value, 4> biArgs; 2896 2897 if (vop == VecOp::Stxvp) { 2898 biArgs.push_back(argBases[0]); 2899 biArgs.push_back(addr); 2900 builder.create<fir::CallOp>(loc, funcOp, biArgs); 2901 return; 2902 } 2903 2904 auto vecTyInfo{getVecTypeFromFirType(argBases[0].getType())}; 2905 auto cnv{builder.createConvert(loc, vecTyInfo.toMlirVectorType(context), 2906 argBases[0])}; 2907 2908 mlir::Value newArg1{nullptr}; 2909 if (stTy != arg1TyInfo.toMlirVectorType(context)) 2910 newArg1 = builder.create<mlir::vector::BitCastOp>(loc, stTy, cnv); 2911 else 2912 newArg1 = cnv; 2913 2914 if (isBEVecElemOrderOnLE()) 2915 newArg1 = builder.createConvert( 2916 loc, stTy, reverseVectorElements(builder, loc, newArg1, 4)); 2917 2918 biArgs.push_back(newArg1); 2919 biArgs.push_back(addr); 2920 2921 builder.create<fir::CallOp>(loc, funcOp, biArgs); 2922 } 2923 2924 // VEC_XST, VEC_XST_BE, VEC_STXV, VEC_XSTD2, VEC_XSTW4 2925 template <VecOp vop> 2926 void PPCIntrinsicLibrary::genVecXStore( 2927 llvm::ArrayRef<fir::ExtendedValue> args) { 2928 assert(args.size() == 3); 2929 auto context{builder.getContext()}; 2930 auto argBases{getBasesForArgs(args)}; 2931 VecTypeInfo arg1TyInfo{getVecTypeFromFir(argBases[0])}; 2932 2933 auto addr{addOffsetToAddress(builder, loc, argBases[2], argBases[1])}; 2934 2935 mlir::Value trg{nullptr}; 2936 mlir::Value src{nullptr}; 2937 2938 switch (vop) { 2939 case VecOp::Xst: 2940 case VecOp::Xst_be: { 2941 src = argBases[0]; 2942 trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()), 2943 addr); 2944 2945 if (vop == VecOp::Xst_be || isBEVecElemOrderOnLE()) { 2946 auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context), 2947 argBases[0])}; 2948 auto shf{reverseVectorElements(builder, loc, cnv, arg1TyInfo.len)}; 2949 2950 src = builder.createConvert(loc, arg1TyInfo.toFirVectorType(), shf); 2951 } 2952 break; 2953 } 2954 case VecOp::Xstd2: 2955 case VecOp::Xstw4: { 2956 // an 16-byte vector arg1 is treated as two 8-byte elements or 2957 // four 4-byte elements 2958 mlir::IntegerType elemTy; 2959 uint64_t numElem = (vop == VecOp::Xstd2) ? 2 : 4; 2960 elemTy = builder.getIntegerType(128 / numElem); 2961 2962 mlir::VectorType mlirVecTy{mlir::VectorType::get(numElem, elemTy)}; 2963 fir::VectorType firVecTy{fir::VectorType::get(numElem, elemTy)}; 2964 2965 auto cnv{builder.createConvert(loc, arg1TyInfo.toMlirVectorType(context), 2966 argBases[0])}; 2967 2968 mlir::Type srcTy{nullptr}; 2969 if (numElem != arg1TyInfo.len) { 2970 cnv = builder.create<mlir::vector::BitCastOp>(loc, mlirVecTy, cnv); 2971 srcTy = firVecTy; 2972 } else { 2973 srcTy = arg1TyInfo.toFirVectorType(); 2974 } 2975 2976 trg = builder.createConvert(loc, builder.getRefType(srcTy), addr); 2977 2978 if (isBEVecElemOrderOnLE()) { 2979 cnv = reverseVectorElements(builder, loc, cnv, numElem); 2980 } 2981 2982 src = builder.createConvert(loc, srcTy, cnv); 2983 break; 2984 } 2985 case VecOp::Stxv: 2986 src = argBases[0]; 2987 trg = builder.createConvert(loc, builder.getRefType(argBases[0].getType()), 2988 addr); 2989 break; 2990 default: 2991 assert(false && "Invalid vector operation for generator"); 2992 } 2993 builder.create<fir::StoreOp>(loc, mlir::TypeRange{}, 2994 mlir::ValueRange{src, trg}, 2995 getAlignmentAttr(builder, 1)); 2996 } 2997 2998 } // namespace fir 2999