1//===- NVPTXIntrinsics.td - PTX Intrinsics Instructions -------*- tblgen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def immFloat0 : PatLeaf<(fpimm), [{ 10 float f = (float)N->getValueAPF().convertToFloat(); 11 return (f==0.0f); 12}]>; 13 14def immFloat1 : PatLeaf<(fpimm), [{ 15 float f = (float)N->getValueAPF().convertToFloat(); 16 return (f==1.0f); 17}]>; 18 19def immDouble0 : PatLeaf<(fpimm), [{ 20 double d = (double)N->getValueAPF().convertToDouble(); 21 return (d==0.0); 22}]>; 23 24def immDouble1 : PatLeaf<(fpimm), [{ 25 double d = (double)N->getValueAPF().convertToDouble(); 26 return (d==1.0); 27}]>; 28 29def AS_match { 30 code generic = [{ 31 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC); 32 }]; 33 code shared = [{ 34 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED); 35 }]; 36 code global = [{ 37 return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL); 38 }]; 39} 40 41// A node that will be replaced with the current PTX version. 42class PTX { 43 SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{ 44 return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N)); 45 }]>; 46 // (i32 0) will be XForm'ed to the currently used PTX version. 47 dag version = (PTXVerXform (i32 0)); 48} 49def ptx : PTX; 50 51// Generates list of n sequential register names. 52// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ] 53class RegSeq<int n, string prefix> { 54 list<string> ret = !if(n, !listconcat(RegSeq<!sub(n, 1), prefix>.ret, 55 [prefix # !sub(n, 1)]), 56 []); 57} 58 59class THREADMASK_INFO<bit sync> { 60 list<bit> ret = !if(sync, [0, 1], [0]); 61} 62 63//----------------------------------- 64// Synchronization and shuffle functions 65//----------------------------------- 66let isConvergent = true in { 67def INT_BARRIER0 : NVPTXInst<(outs), (ins), 68 "bar.sync \t0;", 69 [(int_nvvm_barrier0)]>; 70def INT_BARRIERN : NVPTXInst<(outs), (ins Int32Regs:$src1), 71 "bar.sync \t$src1;", 72 [(int_nvvm_barrier_n i32:$src1)]>; 73def INT_BARRIER : NVPTXInst<(outs), (ins Int32Regs:$src1, Int32Regs:$src2), 74 "bar.sync \t$src1, $src2;", 75 [(int_nvvm_barrier i32:$src1, i32:$src2)]>; 76def INT_BARRIER0_POPC : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 77 !strconcat("{{ \n\t", 78 ".reg .pred \t%p1; \n\t", 79 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 80 "bar.red.popc.u32 \t$dst, 0, %p1; \n\t", 81 "}}"), 82 [(set i32:$dst, (int_nvvm_barrier0_popc i32:$pred))]>; 83def INT_BARRIER0_AND : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 84 !strconcat("{{ \n\t", 85 ".reg .pred \t%p1; \n\t", 86 ".reg .pred \t%p2; \n\t", 87 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 88 "bar.red.and.pred \t%p2, 0, %p1; \n\t", 89 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 90 "}}"), 91 [(set i32:$dst, (int_nvvm_barrier0_and i32:$pred))]>; 92def INT_BARRIER0_OR : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$pred), 93 !strconcat("{{ \n\t", 94 ".reg .pred \t%p1; \n\t", 95 ".reg .pred \t%p2; \n\t", 96 "setp.ne.u32 \t%p1, $pred, 0; \n\t", 97 "bar.red.or.pred \t%p2, 0, %p1; \n\t", 98 "selp.u32 \t$dst, 1, 0, %p2; \n\t", 99 "}}"), 100 [(set i32:$dst, (int_nvvm_barrier0_or i32:$pred))]>; 101 102def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;", 103 [(int_nvvm_bar_sync imm:$i)]>; 104 105def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;", 106 [(int_nvvm_bar_warp_sync imm:$i)]>, 107 Requires<[hasPTX<60>, hasSM<30>]>; 108def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;", 109 [(int_nvvm_bar_warp_sync i32:$i)]>, 110 Requires<[hasPTX<60>, hasSM<30>]>; 111 112def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;", 113 [(int_nvvm_barrier_sync imm:$i)]>, 114 Requires<[hasPTX<60>, hasSM<30>]>; 115def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;", 116 [(int_nvvm_barrier_sync i32:$i)]>, 117 Requires<[hasPTX<60>, hasSM<30>]>; 118 119def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt), 120 "barrier.sync \t$id, $cnt;", 121 [(int_nvvm_barrier_sync_cnt i32:$id, i32:$cnt)]>, 122 Requires<[hasPTX<60>, hasSM<30>]>; 123def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt), 124 "barrier.sync \t$id, $cnt;", 125 [(int_nvvm_barrier_sync_cnt i32:$id, imm:$cnt)]>, 126 Requires<[hasPTX<60>, hasSM<30>]>; 127def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt), 128 "barrier.sync \t$id, $cnt;", 129 [(int_nvvm_barrier_sync_cnt imm:$id, i32:$cnt)]>, 130 Requires<[hasPTX<60>, hasSM<30>]>; 131def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt), 132 "barrier.sync \t$id, $cnt;", 133 [(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>, 134 Requires<[hasPTX<60>, hasSM<30>]>; 135 136class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr, 137 list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>: 138 NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>, 139 Requires<Preds>; 140 141def barrier_cluster_arrive: 142 INT_BARRIER_CLUSTER<"arrive", int_nvvm_barrier_cluster_arrive>; 143def barrier_cluster_arrive_relaxed: 144 INT_BARRIER_CLUSTER<"arrive.relaxed", 145 int_nvvm_barrier_cluster_arrive_relaxed, [hasPTX<80>, hasSM<90>]>; 146def barrier_cluster_wait: 147 INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>; 148 149// 'aligned' versions of the cluster barrier intrinsics 150def barrier_cluster_arrive_aligned: 151 INT_BARRIER_CLUSTER<"arrive.aligned", int_nvvm_barrier_cluster_arrive_aligned>; 152def barrier_cluster_arrive_relaxed_aligned: 153 INT_BARRIER_CLUSTER<"arrive.relaxed.aligned", 154 int_nvvm_barrier_cluster_arrive_relaxed_aligned, [hasPTX<80>, hasSM<90>]>; 155def barrier_cluster_wait_aligned: 156 INT_BARRIER_CLUSTER<"wait.aligned", int_nvvm_barrier_cluster_wait_aligned>; 157 158class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred, 159 bit offset_imm, bit mask_imm, bit threadmask_imm> 160 : NVPTXInst<(outs), (ins), "?", []> { 161 NVPTXRegClass rc = !cond( 162 !eq(reg, "i32"): Int32Regs, 163 !eq(reg, "f32"): Float32Regs); 164 string IntrName = "int_nvvm_shfl_" 165 # !if(sync, "sync_", "") 166 # mode 167 # "_" # reg 168 # !if(return_pred, "p", ""); 169 Intrinsic Intr = !cast<Intrinsic>(IntrName); 170 let InOperandList = !con( 171 !if(sync, 172 !dag(ins, !if(threadmask_imm, [i32imm], [Int32Regs]), ["threadmask"]), 173 (ins)), 174 (ins rc:$src), 175 !dag(ins, !if(offset_imm, [i32imm], [Int32Regs]), ["offset"]), 176 !dag(ins, !if(mask_imm, [i32imm], [Int32Regs]), ["mask"]) 177 ); 178 let OutOperandList = !if(return_pred, (outs rc:$dst, Int1Regs:$pred), (outs rc:$dst)); 179 let AsmString = "shfl." 180 # !if(sync, "sync.", "") 181 # mode # ".b32\t" 182 # "$dst" 183 # !if(return_pred, "|$pred", "") # ", " 184 # "$src, $offset, $mask" 185 # !if(sync, ", $threadmask", "") 186 # ";" 187 ; 188 let Pattern = [!con( 189 !foreach(tmp, OutOperandList, 190 !subst(outs, set, 191 !subst(i32imm, imm, tmp))), 192 (set !foreach(tmp, InOperandList, 193 !subst(ins, Intr, 194 !subst(i32imm, imm, tmp)))) 195 )]; 196} 197 198foreach sync = [false, true] in { 199 foreach mode = ["up", "down", "bfly", "idx"] in { 200 foreach regclass = ["i32", "f32"] in { 201 foreach return_pred = [false, true] in { 202 foreach offset_imm = [false, true] in { 203 foreach mask_imm = [false, true] in { 204 foreach threadmask_imm = THREADMASK_INFO<sync>.ret in { 205 def : SHFL_INSTR<sync, mode, regclass, return_pred, 206 offset_imm, mask_imm, threadmask_imm>, 207 Requires<!if(sync, [hasSM<30>, hasPTX<60>], [hasSM<30>, hasSHFL])>; 208 } 209 } 210 } 211 } 212 } 213 } 214} 215 216// vote.{all,any,uni,ballot} 217multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 218 def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred), 219 "vote." # mode # " \t$dest, $pred;", 220 [(set regclass:$dest, (IntOp i1:$pred))]>, 221 Requires<[hasPTX<60>, hasSM<30>]>; 222} 223 224defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>; 225defm VOTE_ANY : VOTE<Int1Regs, "any.pred", int_nvvm_vote_any>; 226defm VOTE_UNI : VOTE<Int1Regs, "uni.pred", int_nvvm_vote_uni>; 227defm VOTE_BALLOT : VOTE<Int32Regs, "ballot.b32", int_nvvm_vote_ballot>; 228 229// vote.sync.{all,any,uni,ballot} 230multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> { 231 def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred), 232 "vote.sync." # mode # " \t$dest, $pred, $mask;", 233 [(set regclass:$dest, (IntOp imm:$mask, i1:$pred))]>, 234 Requires<[hasPTX<60>, hasSM<30>]>; 235 def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred), 236 "vote.sync." # mode #" \t$dest, $pred, $mask;", 237 [(set regclass:$dest, (IntOp i32:$mask, i1:$pred))]>, 238 Requires<[hasPTX<60>, hasSM<30>]>; 239} 240 241defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>; 242defm VOTE_SYNC_ANY : VOTE_SYNC<Int1Regs, "any.pred", int_nvvm_vote_any_sync>; 243defm VOTE_SYNC_UNI : VOTE_SYNC<Int1Regs, "uni.pred", int_nvvm_vote_uni_sync>; 244defm VOTE_SYNC_BALLOT : VOTE_SYNC<Int32Regs, "ballot.b32", int_nvvm_vote_ballot_sync>; 245 246// elect.sync 247def INT_ELECT_SYNC_I : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins i32imm:$mask), 248 "elect.sync \t$dest|$pred, $mask;", 249 [(set i32:$dest, i1:$pred, (int_nvvm_elect_sync imm:$mask))]>, 250 Requires<[hasPTX<80>, hasSM<90>]>; 251def INT_ELECT_SYNC_R : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), (ins Int32Regs:$mask), 252 "elect.sync \t$dest|$pred, $mask;", 253 [(set i32:$dest, i1:$pred, (int_nvvm_elect_sync i32:$mask))]>, 254 Requires<[hasPTX<80>, hasSM<90>]>; 255 256multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 257 Operand ImmOp> { 258 def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value), 259 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 260 [(set i32:$dest, (IntOp imm:$mask, imm:$value))]>, 261 Requires<[hasPTX<60>, hasSM<70>]>; 262 def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value), 263 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 264 [(set i32:$dest, (IntOp i32:$mask, imm:$value))]>, 265 Requires<[hasPTX<60>, hasSM<70>]>; 266 def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value), 267 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 268 [(set i32:$dest, (IntOp imm:$mask, regclass:$value))]>, 269 Requires<[hasPTX<60>, hasSM<70>]>; 270 def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value), 271 "match.any.sync." # ptxtype # " \t$dest, $value, $mask;", 272 [(set i32:$dest, (IntOp i32:$mask, regclass:$value))]>, 273 Requires<[hasPTX<60>, hasSM<70>]>; 274} 275 276// activemask.b32 277def ACTIVEMASK : NVPTXInst<(outs Int32Regs:$dest), (ins), 278 "activemask.b32 \t$dest;", 279 [(set i32:$dest, (int_nvvm_activemask))]>, 280 Requires<[hasPTX<62>, hasSM<30>]>; 281 282defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32, 283 i32imm>; 284defm MATCH_ANY_SYNC_64 : MATCH_ANY_SYNC<Int64Regs, "b64", int_nvvm_match_any_sync_i64, 285 i64imm>; 286 287multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntOp, 288 Operand ImmOp> { 289 def ii : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 290 (ins i32imm:$mask, ImmOp:$value), 291 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 292 [(set i32:$dest, i1:$pred, (IntOp imm:$mask, imm:$value))]>, 293 Requires<[hasPTX<60>, hasSM<70>]>; 294 def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 295 (ins Int32Regs:$mask, ImmOp:$value), 296 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 297 [(set i32:$dest, i1:$pred, (IntOp i32:$mask, imm:$value))]>, 298 Requires<[hasPTX<60>, hasSM<70>]>; 299 def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 300 (ins i32imm:$mask, regclass:$value), 301 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 302 [(set i32:$dest, i1:$pred, (IntOp imm:$mask, regclass:$value))]>, 303 Requires<[hasPTX<60>, hasSM<70>]>; 304 def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred), 305 (ins Int32Regs:$mask, regclass:$value), 306 "match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;", 307 [(set i32:$dest, i1:$pred, (IntOp i32:$mask, regclass:$value))]>, 308 Requires<[hasPTX<60>, hasSM<70>]>; 309} 310defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p, 311 i32imm>; 312defm MATCH_ALLP_SYNC_64 : MATCH_ALLP_SYNC<Int64Regs, "b64", int_nvvm_match_all_sync_i64p, 313 i64imm>; 314 315multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> { 316 def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask), 317 "redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;", 318 [(set i32:$dst, (Intrin i32:$src, Int32Regs:$mask))]>, 319 Requires<[hasPTX<70>, hasSM<80>]>; 320} 321 322defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>; 323defm REDUX_SYNC_UMAX : REDUX_SYNC<"max", "u32", int_nvvm_redux_sync_umax>; 324defm REDUX_SYNC_ADD : REDUX_SYNC<"add", "s32", int_nvvm_redux_sync_add>; 325defm REDUX_SYNC_MIN : REDUX_SYNC<"min", "s32", int_nvvm_redux_sync_min>; 326defm REDUX_SYNC_MAX : REDUX_SYNC<"max", "s32", int_nvvm_redux_sync_max>; 327defm REDUX_SYNC_AND : REDUX_SYNC<"and", "b32", int_nvvm_redux_sync_and>; 328defm REDUX_SYNC_XOR : REDUX_SYNC<"xor", "b32", int_nvvm_redux_sync_xor>; 329defm REDUX_SYNC_OR : REDUX_SYNC<"or", "b32", int_nvvm_redux_sync_or>; 330 331} // isConvergent = true 332 333//----------------------------------- 334// Explicit Memory Fence Functions 335//----------------------------------- 336class MEMBAR<string StrOp, Intrinsic IntOP> : 337 NVPTXInst<(outs), (ins), 338 StrOp, [(IntOP)]>; 339 340def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>; 341def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>; 342def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>; 343 344def INT_FENCE_SC_CLUSTER: 345 MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>, 346 Requires<[hasPTX<78>, hasSM<90>]>; 347 348// Proxy fence (uni-directional) 349// fence.proxy.tensormap.release variants 350 351class FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<string Scope, Intrinsic Intr> : 352 NVPTXInst<(outs), (ins), 353 "fence.proxy.tensormap::generic.release." # Scope # ";", [(Intr)]>, 354 Requires<[hasPTX<83>, hasSM<90>]>; 355 356def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_CTA: 357 FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"cta", 358 int_nvvm_fence_proxy_tensormap_generic_release_cta>; 359def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_CLUSTER: 360 FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"cluster", 361 int_nvvm_fence_proxy_tensormap_generic_release_cluster>; 362def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_GPU: 363 FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"gpu", 364 int_nvvm_fence_proxy_tensormap_generic_release_gpu>; 365def INT_FENCE_PROXY_TENSORMAP_GENERIC_RELEASE_SYS: 366 FENCE_PROXY_TENSORMAP_GENERIC_RELEASE<"sys", 367 int_nvvm_fence_proxy_tensormap_generic_release_sys>; 368 369// fence.proxy.tensormap.acquire variants 370 371class FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<string Scope, Intrinsic Intr> : 372 NVPTXInst<(outs), (ins Int64Regs:$addr), 373 "fence.proxy.tensormap::generic.acquire." # Scope # " [$addr], 128;", 374 [(Intr i64:$addr, (i32 128))]>, 375 Requires<[hasPTX<83>, hasSM<90>]>; 376 377def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_CTA : 378 FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"cta", 379 int_nvvm_fence_proxy_tensormap_generic_acquire_cta>; 380def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_CLUSTER : 381 FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"cluster", 382 int_nvvm_fence_proxy_tensormap_generic_acquire_cluster>; 383def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_GPU : 384 FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"gpu", 385 int_nvvm_fence_proxy_tensormap_generic_acquire_gpu>; 386def INT_FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE_SYS : 387 FENCE_PROXY_TENSORMAP_GENERIC_ACQUIRE<"sys", 388 int_nvvm_fence_proxy_tensormap_generic_acquire_sys>; 389 390//----------------------------------- 391// Async Copy Functions 392//----------------------------------- 393 394multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic Intrin> { 395 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 396 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 397 [(Intrin i32:$addr)]>, 398 Requires<[hasPTX<70>, hasSM<80>]>; 399 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 400 !strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"), 401 [(Intrin i64:$addr)]>, 402 Requires<[hasPTX<70>, hasSM<80>]>; 403} 404 405defm CP_ASYNC_MBARRIER_ARRIVE : 406 CP_ASYNC_MBARRIER_ARRIVE<"", "", int_nvvm_cp_async_mbarrier_arrive>; 407defm CP_ASYNC_MBARRIER_ARRIVE_SHARED : 408 CP_ASYNC_MBARRIER_ARRIVE<"", ".shared", int_nvvm_cp_async_mbarrier_arrive_shared>; 409defm CP_ASYNC_MBARRIER_ARRIVE_NOINC : 410 CP_ASYNC_MBARRIER_ARRIVE<".noinc", "", int_nvvm_cp_async_mbarrier_arrive_noinc>; 411defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED : 412 CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>; 413 414multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> { 415 def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src), 416 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"), 417 [(Intrin i32:$dst, i32:$src)]>, 418 Requires<[hasPTX<70>, hasSM<80>]>; 419 def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src), 420 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"), 421 [(Intrin i64:$dst, i64:$src)]>, 422 Requires<[hasPTX<70>, hasSM<80>]>; 423 // Variant with src_size parameter 424 def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size), 425 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 426 [(IntrinS i32:$dst, i32:$src, i32:$src_size)]>, 427 Requires<[hasPTX<70>, hasSM<80>]>; 428 def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size), 429 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 430 [(IntrinS i32:$dst, i32:$src, imm:$src_size)]>, 431 Requires<[hasPTX<70>, hasSM<80>]>; 432 def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size), 433 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 434 [(IntrinS i64:$dst, i64:$src, i32:$src_size)]>, 435 Requires<[hasPTX<70>, hasSM<80>]>; 436 def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size), 437 !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"), 438 [(IntrinS i64:$dst, i64:$src, imm:$src_size)]>, 439 Requires<[hasPTX<70>, hasSM<80>]>; 440} 441 442defm CP_ASYNC_CA_SHARED_GLOBAL_4 : 443 CP_ASYNC_SHARED_GLOBAL_I<"ca", "4", int_nvvm_cp_async_ca_shared_global_4, 444 int_nvvm_cp_async_ca_shared_global_4_s>; 445 446defm CP_ASYNC_CA_SHARED_GLOBAL_8 : 447 CP_ASYNC_SHARED_GLOBAL_I<"ca", "8", int_nvvm_cp_async_ca_shared_global_8, 448 int_nvvm_cp_async_ca_shared_global_8_s>; 449 450defm CP_ASYNC_CA_SHARED_GLOBAL_16 : 451 CP_ASYNC_SHARED_GLOBAL_I<"ca", "16", int_nvvm_cp_async_ca_shared_global_16, 452 int_nvvm_cp_async_ca_shared_global_16_s>; 453 454defm CP_ASYNC_CG_SHARED_GLOBAL_16 : 455 CP_ASYNC_SHARED_GLOBAL_I<"cg", "16", int_nvvm_cp_async_cg_shared_global_16, 456 int_nvvm_cp_async_cg_shared_global_16_s>; 457 458def CP_ASYNC_COMMIT_GROUP : 459 NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>, 460 Requires<[hasPTX<70>, hasSM<80>]>; 461 462def CP_ASYNC_WAIT_GROUP : 463 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;", 464 [(int_nvvm_cp_async_wait_group timm:$n)]>, 465 Requires<[hasPTX<70>, hasSM<80>]>; 466 467def CP_ASYNC_WAIT_ALL : 468 NVPTXInst<(outs), (ins), "cp.async.wait_all;", 469 [(int_nvvm_cp_async_wait_all)]>, 470 Requires<[hasPTX<70>, hasSM<80>]>; 471 472// cp.async.bulk variants of the commit/wait group 473def CP_ASYNC_BULK_COMMIT_GROUP : 474 NVPTXInst<(outs), (ins), "cp.async.bulk.commit_group;", 475 [(int_nvvm_cp_async_bulk_commit_group)]>, 476 Requires<[hasPTX<80>, hasSM<90>]>; 477 478def CP_ASYNC_BULK_WAIT_GROUP : 479 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group $n;", 480 [(int_nvvm_cp_async_bulk_wait_group timm:$n)]>, 481 Requires<[hasPTX<80>, hasSM<90>]>; 482 483def CP_ASYNC_BULK_WAIT_GROUP_READ : 484 NVPTXInst<(outs), (ins i32imm:$n), "cp.async.bulk.wait_group.read $n;", 485 [(int_nvvm_cp_async_bulk_wait_group_read timm:$n)]>, 486 Requires<[hasPTX<80>, hasSM<90>]>; 487 488//------------------------------ 489// TMA Async Bulk Copy Functions 490//------------------------------ 491 492class CpAsyncBulkStr<bit mc, bit ch> { 493 // Shared to Global memory 494 string S2G = "cp.async.bulk.global.shared::cta.bulk_group" 495 # !if(ch, ".L2::cache_hint", ""); 496 497 // Global to Shared cluster memory 498 string G2S = "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes" 499 # !if(mc, ".multicast::cluster", "") 500 # !if(ch, ".L2::cache_hint", ""); 501 502 // Shared CTA to Cluster memory 503 string C2C = "cp.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes"; 504} 505 506multiclass CP_ASYNC_BULK_S2G<NVPTXRegClass rc> { 507 def NAME: NVPTXInst<(outs), 508 (ins Int64Regs:$dst, rc:$src, Int32Regs:$size), 509 !strconcat(CpAsyncBulkStr<0, 0>.S2G, " [$dst], [$src], $size;"), []>, 510 Requires<[hasPTX<80>, hasSM<90>]>; 511 def NAME # _CH: NVPTXInst<(outs), 512 (ins Int64Regs:$dst, rc:$src, Int32Regs:$size, Int64Regs:$ch), 513 !strconcat(CpAsyncBulkStr<0, 1>.S2G, " [$dst], [$src], $size, $ch;"), []>, 514 Requires<[hasPTX<80>, hasSM<90>]>; 515} 516defm CP_ASYNC_BULK_S2G : CP_ASYNC_BULK_S2G<Int64Regs>; 517defm CP_ASYNC_BULK_S2G_SHARED32 : CP_ASYNC_BULK_S2G<Int32Regs>; 518 519multiclass CP_ASYNC_BULK_G2S<NVPTXRegClass rc> { 520 def NAME: NVPTXInst<(outs), 521 (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size), 522 !strconcat(CpAsyncBulkStr<0, 0>.G2S, " [$dst], [$src], $size, [$mbar];"), []>, 523 Requires<[hasPTX<80>, hasSM<90>]>; 524 def NAME # _MC: NVPTXInst<(outs), 525 (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size, Int16Regs:$mc), 526 !strconcat(CpAsyncBulkStr<1, 0>.G2S, " [$dst], [$src], $size, [$mbar], $mc;"), []>, 527 Requires<[hasPTX<80>, hasSM<90>]>; 528 def NAME # _CH: NVPTXInst<(outs), 529 (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size, Int64Regs:$ch), 530 !strconcat(CpAsyncBulkStr<0, 1>.G2S, " [$dst], [$src], $size, [$mbar], $ch;"), []>, 531 Requires<[hasPTX<80>, hasSM<90>]>; 532 def NAME # _MC_CH: NVPTXInst<(outs), 533 (ins rc:$dst, rc:$mbar, Int64Regs:$src, Int32Regs:$size, Int16Regs:$mc, Int64Regs:$ch), 534 !strconcat(CpAsyncBulkStr<1, 1>.G2S, " [$dst], [$src], $size, [$mbar], $mc, $ch;"), []>, 535 Requires<[hasPTX<80>, hasSM<90>]>; 536} 537defm CP_ASYNC_BULK_G2S : CP_ASYNC_BULK_G2S<Int64Regs>; 538defm CP_ASYNC_BULK_G2S_SHARED32 : CP_ASYNC_BULK_G2S<Int32Regs>; 539 540multiclass CP_ASYNC_BULK_CTA_TO_CLUSTER<NVPTXRegClass rc> { 541 def NAME: NVPTXInst<(outs), 542 (ins rc:$dst, rc:$mbar, rc:$src, Int32Regs:$size), 543 !strconcat(CpAsyncBulkStr<0, 0>.C2C, " [$dst], [$src], $size, [$mbar];"), 544 [(int_nvvm_cp_async_bulk_shared_cta_to_cluster rc:$dst, rc:$mbar, rc:$src, Int32Regs:$size)]>, 545 Requires<[hasPTX<80>, hasSM<90>]>; 546} 547defm CP_ASYNC_BULK_CTA_TO_CLUSTER : CP_ASYNC_BULK_CTA_TO_CLUSTER<Int64Regs>; 548defm CP_ASYNC_BULK_CTA_TO_CLUSTER_SHARED32 : CP_ASYNC_BULK_CTA_TO_CLUSTER<Int32Regs>; 549 550//------------------------------ 551// Bulk Copy Prefetch Functions 552//------------------------------ 553def CP_ASYNC_BULK_PREFETCH : NVPTXInst<(outs), 554 (ins Int64Regs:$src, Int32Regs:$size), 555 "cp.async.bulk.prefetch.L2.global [$src], $size;", []>, 556 Requires<[hasPTX<80>, hasSM<90>]>; 557 558def CP_ASYNC_BULK_PREFETCH_CH : NVPTXInst<(outs), 559 (ins Int64Regs:$src, Int32Regs:$size, Int64Regs:$ch), 560 "cp.async.bulk.prefetch.L2.global.L2::cache_hint [$src], $size, $ch;", []>, 561 Requires<[hasPTX<80>, hasSM<90>]>; 562//------------------------------------- 563// TMA Async Bulk Tensor Copy Functions 564//------------------------------------- 565 566// From Global to Shared memory (G2S) 567class G2S_STRINGS<int dim, string mode, bit mc, bit ch, bit is_shared32 = 0> { 568 string prefix = "cp.async.bulk.tensor"; 569 string dir = "shared::cluster.global"; 570 string completion = "mbarrier::complete_tx::bytes"; 571 string inst_name = prefix 572 # "." # dim # "d" 573 # "." # dir 574 # "." # mode 575 # "." # completion 576 # !if(mc, ".multicast::cluster", "") 577 # !if(ch, ".L2::cache_hint", ""); 578 string intr_name = "CP_ASYNC_BULK_TENSOR_G2S_" 579 # dim # "D" 580 # !if(is_shared32, "_SHARED32", "") 581 # !if(!eq(mode, "tile"), "_TILE", "_IM2COL"); 582} 583 584multiclass CP_ASYNC_BULK_TENSOR_G2S_INTR<int dim, bit is_shared32, string mode> { 585 defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i)); 586 defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); 587 defvar asm_str_default = " [$dst], [$tmap, {{" # dims_str # "}}], [$mbar]"; 588 defvar rc = !if(is_shared32, Int32Regs, Int64Regs); 589 590 defvar num_im2col = !if(!ge(dim, 3), !add(dim, -2), 0); 591 defvar im2col_dag = !if(!eq(mode, "im2col"), 592 !dag(ins, !listsplat(Int16Regs, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)), 593 (ins)); 594 defvar im2col_str = !interleave(!foreach(i, !range(num_im2col), "$im2col" # i), ", "); 595 defvar im2col_asm_str = ", {{" # im2col_str # "}}"; 596 597 defvar asm_str = !if(!eq(mode, "im2col"), 598 !strconcat(asm_str_default, im2col_asm_str), asm_str_default); 599 600 def NAME: NVPTXInst<(outs), 601 !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag), 602 !strconcat(G2S_STRINGS<dim, mode, 0, 0>.inst_name, asm_str, ";"), []>, 603 Requires<[hasPTX<80>, hasSM<90>]>; 604 def NAME # _MC: NVPTXInst<(outs), 605 !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int16Regs:$mc)), 606 !strconcat(G2S_STRINGS<dim, mode, 1, 0>.inst_name, asm_str, ", $mc;"), []>, 607 Requires<[hasPTX<80>, hasSM<90>]>; 608 def NAME # _CH: NVPTXInst<(outs), 609 !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int64Regs:$ch)), 610 !strconcat(G2S_STRINGS<dim, mode, 0, 1>.inst_name, asm_str, ", $ch;"), []>, 611 Requires<[hasPTX<80>, hasSM<90>]>; 612 def NAME # _MC_CH: NVPTXInst<(outs), 613 !con((ins rc:$dst, rc:$mbar, Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int16Regs:$mc, Int64Regs:$ch)), 614 !strconcat(G2S_STRINGS<dim, mode, 1, 1>.inst_name, asm_str, ", $mc, $ch;"), []>, 615 Requires<[hasPTX<80>, hasSM<90>]>; 616} 617 618foreach dim = [1, 2, 3, 4, 5] in { 619 foreach shared32 = [true, false] in { 620 foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in { 621 defm G2S_STRINGS<dim, mode, 0, 0, shared32>.intr_name : 622 CP_ASYNC_BULK_TENSOR_G2S_INTR<dim, shared32, mode>; 623 } 624 } 625} 626 627// From Shared to Global memory (S2G) 628class S2G_STRINGS<int dim, string mode, bit ch, 629 bit is_shared32 = 0, bit is_reduce = 0> { 630 string dir = "global.shared::cta"; 631 string completion = "bulk_group"; 632 string inst_name = !if(is_reduce, "cp.reduce", "cp") 633 # ".async.bulk.tensor" 634 # "." # dim # "d" 635 # "." # dir 636 # "." # mode 637 # "." # completion 638 # !if(ch, ".L2::cache_hint", ""); 639 string intr_name = "CP_ASYNC_BULK_TENSOR_" 640 # !if(is_reduce, "RED_", "S2G_") 641 # dim # "D" 642 # !if(is_shared32, "_SHARED32", "") 643 # !if(!eq(mode, "tile"), "_TILE", "_IM2COL"); 644} 645 646multiclass CP_ASYNC_BULK_TENSOR_S2G_INTR<int dim, bit shared32, string mode> { 647 defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i)); 648 defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); 649 defvar asm_str = " [$tmap, {{" # dims_str # "}}], [$src]"; 650 defvar rc = !if(shared32, Int32Regs, Int64Regs); 651 652 def NAME: NVPTXInst<(outs), 653 !con((ins rc:$src, Int64Regs:$tmap), dims_dag), 654 !strconcat(S2G_STRINGS<dim, mode, 0>.inst_name, asm_str, ";"), []>, 655 Requires<[hasPTX<80>, hasSM<90>]>; 656 def NAME # _CH: NVPTXInst<(outs), 657 !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins Int64Regs:$ch)), 658 !strconcat(S2G_STRINGS<dim, mode, 1>.inst_name, asm_str, ", $ch;"), []>, 659 Requires<[hasPTX<80>, hasSM<90>]>; 660} 661 662def TMAReductionFlags : Operand<i32> { 663 let PrintMethod = "printTmaReductionMode"; 664} 665 666// TMA Copy from Shared to Global memory with Reduction 667multiclass CP_ASYNC_BULK_TENSOR_REDUCE_INTR<int dim, bit shared32, string mode> { 668 defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i)); 669 defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); 670 defvar asm_str = " [$tmap, {{" # dims_str # "}}], [$src]"; 671 defvar rc = !if(shared32, Int32Regs, Int64Regs); 672 673 defvar prefix = "cp.reduce.async.bulk.tensor" # "." # dim # "d" # ".global.shared::cta"; 674 defvar suffix = "." # mode # ".bulk_group"; 675 676 def NAME: NVPTXInst<(outs), 677 !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins TMAReductionFlags:$red_op)), 678 !strconcat(prefix, "${red_op}", suffix, asm_str, ";"), []>, 679 Requires<[hasPTX<80>, hasSM<90>]>; 680 def NAME # _CH: NVPTXInst<(outs), 681 !con((ins rc:$src, Int64Regs:$tmap), dims_dag, (ins Int64Regs:$ch, TMAReductionFlags:$red_op)), 682 !strconcat(prefix, "${red_op}", suffix, ".L2::cache_hint", asm_str, ", $ch;"), []>, 683 Requires<[hasPTX<80>, hasSM<90>]>; 684} 685 686foreach dim = [1, 2, 3, 4, 5] in { 687 foreach shared32 = [true, false] in { 688 foreach mode = !if(!ge(dim, 3), ["tile", "im2col_no_offs"], ["tile"]) in { 689 defm S2G_STRINGS<dim, mode, 0, shared32>.intr_name : 690 CP_ASYNC_BULK_TENSOR_S2G_INTR<dim, shared32, mode>; 691 defm S2G_STRINGS<dim, mode, 0, shared32, 1>.intr_name : 692 CP_ASYNC_BULK_TENSOR_REDUCE_INTR<dim, shared32, mode>; 693 } 694 } 695} 696 697// TMA Prefetch from Global memory to L2 cache 698class PREFETCH_STRINGS<int dim, string mode, bit ch> { 699 string prefix = "cp.async.bulk.prefetch.tensor"; 700 string dir = "L2.global"; 701 string inst_name = prefix 702 # "." # dim # "d" 703 # "." # dir 704 # "." # mode 705 # !if(ch, ".L2::cache_hint", ""); 706 string intr_name = "CP_ASYNC_BULK_TENSOR_PREFETCH_" 707 # dim # "D" 708 # !if(!eq(mode, "tile"), "_TILE", "_IM2COL"); 709} 710 711multiclass CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<int dim, string mode> { 712 defvar dims_dag = !dag(ins, !listsplat(Int32Regs, dim), !foreach(i, !range(dim), "d" # i)); 713 defvar dims_str = !interleave(!foreach(i, !range(dim), "$d" # i), ", "); 714 defvar asm_str_default = " [$tmap, {{" # dims_str # "}}]"; 715 716 defvar num_im2col = !if(!ge(dim, 3), !add(dim, -2), 0); 717 defvar im2col_dag = !if(!eq(mode, "im2col"), 718 !dag(ins, !listsplat(Int16Regs, num_im2col), !foreach(i, !range(num_im2col), "im2col" # i)), 719 (ins)); 720 defvar im2col_str = !interleave(!foreach(i, !range(num_im2col), "$im2col" # i), ", "); 721 defvar im2col_asm_str = ", {{" # im2col_str # "}}"; 722 723 defvar asm_str = !if(!eq(mode, "im2col"), 724 !strconcat(asm_str_default, im2col_asm_str), asm_str_default); 725 726 def NAME: NVPTXInst<(outs), 727 !con((ins Int64Regs:$tmap), dims_dag, im2col_dag), 728 !strconcat(PREFETCH_STRINGS<dim, mode, 0>.inst_name, asm_str, ";"), []>, 729 Requires<[hasPTX<80>, hasSM<90>]>; 730 def NAME # _CH: NVPTXInst<(outs), 731 !con((ins Int64Regs:$tmap), dims_dag, im2col_dag, (ins Int64Regs:$ch)), 732 !strconcat(PREFETCH_STRINGS<dim, mode, 1>.inst_name, asm_str, ", $ch;"), []>, 733 Requires<[hasPTX<80>, hasSM<90>]>; 734} 735 736foreach dim = [1, 2, 3, 4, 5] in { 737 foreach mode = !if(!ge(dim, 3), ["tile", "im2col"], ["tile"]) in { 738 defm PREFETCH_STRINGS<dim, mode, 0>.intr_name : 739 CP_ASYNC_BULK_TENSOR_PREFETCH_INTR<dim, mode>; 740 } 741} 742 743//----------------------------------- 744// MBarrier Functions 745//----------------------------------- 746 747multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> { 748 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count), 749 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 750 [(Intrin i32:$addr, i32:$count)]>, 751 Requires<[hasPTX<70>, hasSM<80>]>; 752 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count), 753 !strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"), 754 [(Intrin i64:$addr, i32:$count)]>, 755 Requires<[hasPTX<70>, hasSM<80>]>; 756} 757 758defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>; 759defm MBARRIER_INIT_SHARED : MBARRIER_INIT<".shared", 760 int_nvvm_mbarrier_init_shared>; 761 762multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> { 763 def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr), 764 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 765 [(Intrin i32:$addr)]>, 766 Requires<[hasPTX<70>, hasSM<80>]>; 767 def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr), 768 !strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"), 769 [(Intrin i64:$addr)]>, 770 Requires<[hasPTX<70>, hasSM<80>]>; 771} 772 773defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>; 774defm MBARRIER_INVAL_SHARED : MBARRIER_INVAL<".shared", 775 int_nvvm_mbarrier_inval_shared>; 776 777multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> { 778 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 779 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 780 [(set i64:$state, (Intrin i32:$addr))]>, 781 Requires<[hasPTX<70>, hasSM<80>]>; 782 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 783 !strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"), 784 [(set i64:$state, (Intrin i64:$addr))]>, 785 Requires<[hasPTX<70>, hasSM<80>]>; 786} 787 788defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>; 789defm MBARRIER_ARRIVE_SHARED : 790 MBARRIER_ARRIVE<".shared", int_nvvm_mbarrier_arrive_shared>; 791 792multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 793 def _32 : NVPTXInst<(outs Int64Regs:$state), 794 (ins Int32Regs:$addr, Int32Regs:$count), 795 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 796 ".b64 $state, [$addr], $count;"), 797 [(set i64:$state, (Intrin i32:$addr, i32:$count))]>, 798 Requires<[hasPTX<70>, hasSM<80>]>; 799 def _64 : NVPTXInst<(outs Int64Regs:$state), 800 (ins Int64Regs:$addr, Int32Regs:$count), 801 !strconcat("mbarrier.arrive.noComplete", AddrSpace, 802 ".b64 $state, [$addr], $count;"), 803 [(set i64:$state, (Intrin i64:$addr, i32:$count))]>, 804 Requires<[hasPTX<70>, hasSM<80>]>; 805} 806 807defm MBARRIER_ARRIVE_NOCOMPLETE : 808 MBARRIER_ARRIVE_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_noComplete>; 809defm MBARRIER_ARRIVE_NOCOMPLETE_SHARED : 810 MBARRIER_ARRIVE_NOCOMPLETE<".shared", int_nvvm_mbarrier_arrive_noComplete_shared>; 811 812multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> { 813 def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr), 814 !strconcat("mbarrier.arrive_drop", AddrSpace, 815 ".b64 $state, [$addr];"), 816 [(set i64:$state, (Intrin i32:$addr))]>, 817 Requires<[hasPTX<70>, hasSM<80>]>; 818 def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr), 819 !strconcat("mbarrier.arrive_drop", AddrSpace, 820 ".b64 $state, [$addr];"), 821 [(set i64:$state, (Intrin i64:$addr))]>, 822 Requires<[hasPTX<70>, hasSM<80>]>; 823} 824 825defm MBARRIER_ARRIVE_DROP : 826 MBARRIER_ARRIVE_DROP<"", int_nvvm_mbarrier_arrive_drop>; 827defm MBARRIER_ARRIVE_DROP_SHARED : 828 MBARRIER_ARRIVE_DROP<".shared", int_nvvm_mbarrier_arrive_drop_shared>; 829 830multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> { 831 def _32 : NVPTXInst<(outs Int64Regs:$state), 832 (ins Int32Regs:$addr, Int32Regs:$count), 833 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 834 ".b64 $state, [$addr], $count;"), 835 [(set i64:$state, (Intrin i32:$addr, i32:$count))]>, 836 Requires<[hasPTX<70>, hasSM<80>]>; 837 def _64 : NVPTXInst<(outs Int64Regs:$state), 838 (ins Int64Regs:$addr, Int32Regs:$count), 839 !strconcat("mbarrier.arrive_drop.noComplete", AddrSpace, 840 ".b64 $state, [$addr], $count;"), 841 [(set i64:$state, (Intrin i64:$addr, i32:$count))]>, 842 Requires<[hasPTX<70>, hasSM<80>]>; 843} 844 845defm MBARRIER_ARRIVE_DROP_NOCOMPLETE : 846 MBARRIER_ARRIVE_DROP_NOCOMPLETE<"", int_nvvm_mbarrier_arrive_drop_noComplete>; 847defm MBARRIER_ARRIVE_DROP_NOCOMPLETE_SHARED : 848 MBARRIER_ARRIVE_DROP_NOCOMPLETE<".shared", 849 int_nvvm_mbarrier_arrive_drop_noComplete_shared>; 850 851multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> { 852 def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state), 853 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 854 [(set i1:$res, (Intrin i32:$addr, i64:$state))]>, 855 Requires<[hasPTX<70>, hasSM<80>]>; 856 def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state), 857 !strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"), 858 [(set i1:$res, (Intrin i64:$addr, i64:$state))]>, 859 Requires<[hasPTX<70>, hasSM<80>]>; 860} 861 862defm MBARRIER_TEST_WAIT : 863 MBARRIER_TEST_WAIT<"", int_nvvm_mbarrier_test_wait>; 864defm MBARRIER_TEST_WAIT_SHARED : 865 MBARRIER_TEST_WAIT<".shared", int_nvvm_mbarrier_test_wait_shared>; 866 867class MBARRIER_PENDING_COUNT<Intrinsic Intrin> : 868 NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state), 869 "mbarrier.pending_count.b64 $res, $state;", 870 [(set i32:$res, (Intrin i64:$state))]>, 871 Requires<[hasPTX<70>, hasSM<80>]>; 872 873def MBARRIER_PENDING_COUNT : 874 MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>; 875 876//----------------------------------- 877// Math Functions 878//----------------------------------- 879 880// Map min(1.0, max(0.0, x)) to sat(x) 881// Note that max(0.0, min(x, 1.0)) cannot be mapped to sat(x) because when x is 882// NaN 883// max(0.0, min(x, 1.0)) is 1.0 while sat(x) is 0. 884// Same story for fmax, fmin. 885 886def : Pat<(int_nvvm_fmin_f immFloat1, 887 (int_nvvm_fmax_f immFloat0, f32:$a)), 888 (CVT_f32_f32 $a, CvtSAT)>; 889def : Pat<(int_nvvm_fmin_f immFloat1, 890 (int_nvvm_fmax_f f32:$a, immFloat0)), 891 (CVT_f32_f32 $a, CvtSAT)>; 892def : Pat<(int_nvvm_fmin_f 893 (int_nvvm_fmax_f immFloat0, f32:$a), immFloat1), 894 (CVT_f32_f32 $a, CvtSAT)>; 895def : Pat<(int_nvvm_fmin_f 896 (int_nvvm_fmax_f f32:$a, immFloat0), immFloat1), 897 (CVT_f32_f32 $a, CvtSAT)>; 898 899def : Pat<(int_nvvm_fmin_d immDouble1, 900 (int_nvvm_fmax_d immDouble0, f64:$a)), 901 (CVT_f64_f64 $a, CvtSAT)>; 902def : Pat<(int_nvvm_fmin_d immDouble1, 903 (int_nvvm_fmax_d f64:$a, immDouble0)), 904 (CVT_f64_f64 $a, CvtSAT)>; 905def : Pat<(int_nvvm_fmin_d 906 (int_nvvm_fmax_d immDouble0, f64:$a), immDouble1), 907 (CVT_f64_f64 $a, CvtSAT)>; 908def : Pat<(int_nvvm_fmin_d 909 (int_nvvm_fmax_d f64:$a, immDouble0), immDouble1), 910 (CVT_f64_f64 $a, CvtSAT)>; 911 912 913// We need a full string for OpcStr here because we need to deal with case like 914// INT_PTX_RECIP. 915class F_MATH_1<string OpcStr, NVPTXRegClass target_regclass, 916 NVPTXRegClass src_regclass, Intrinsic IntOP, list<Predicate> Preds = []> 917 : NVPTXInst<(outs target_regclass:$dst), (ins src_regclass:$src0), 918 OpcStr, 919 [(set target_regclass:$dst, (IntOP src_regclass:$src0))]>, 920 Requires<Preds>; 921 922// We need a full string for OpcStr here because we need to deal with the case 923// like INT_PTX_NATIVE_POWR_F. 924class F_MATH_2<string OpcStr, NVPTXRegClass t_regclass, 925 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, Intrinsic IntOP, 926 list<Predicate> Preds = []> 927 : NVPTXInst<(outs t_regclass:$dst), 928 (ins s0_regclass:$src0, s1_regclass:$src1), 929 OpcStr, 930 [(set t_regclass:$dst, (IntOP s0_regclass:$src0, s1_regclass:$src1))]>, 931 Requires<Preds>; 932 933class F_MATH_3<string OpcStr, NVPTXRegClass t_regclass, 934 NVPTXRegClass s0_regclass, NVPTXRegClass s1_regclass, 935 NVPTXRegClass s2_regclass, Intrinsic IntOP, list<Predicate> Preds = []> 936 : NVPTXInst<(outs t_regclass:$dst), 937 (ins s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2), 938 OpcStr, 939 [(set t_regclass:$dst, 940 (IntOP s0_regclass:$src0, s1_regclass:$src1, s2_regclass:$src2))]>, 941 Requires<Preds>; 942 943// 944// MISC 945// 946 947def INT_NVVM_PRMT : F_MATH_3<"prmt.b32 \t$dst, $src0, $src1, $src2;", Int32Regs, 948 Int32Regs, Int32Regs, Int32Regs, int_nvvm_prmt>; 949 950def INT_NVVM_NANOSLEEP_I : NVPTXInst<(outs), (ins i32imm:$i), "nanosleep.u32 \t$i;", 951 [(int_nvvm_nanosleep imm:$i)]>, 952 Requires<[hasPTX<63>, hasSM<70>]>; 953def INT_NVVM_NANOSLEEP_R : NVPTXInst<(outs), (ins Int32Regs:$i), "nanosleep.u32 \t$i;", 954 [(int_nvvm_nanosleep i32:$i)]>, 955 Requires<[hasPTX<63>, hasSM<70>]>; 956// 957// Min Max 958// 959 960def INT_NVVM_FMIN_F : F_MATH_2<"min.f32 \t$dst, $src0, $src1;", Float32Regs, 961 Float32Regs, Float32Regs, int_nvvm_fmin_f>; 962def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;", 963 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>; 964def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;", 965 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f, 966 [hasPTX<70>, hasSM<80>]>; 967def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;", 968 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f, 969 [hasPTX<70>, hasSM<80>]>; 970def INT_NVVM_FMIN_XORSIGN_ABS_F : 971 F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;", 972 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f, 973 [hasPTX<72>, hasSM<86>]>; 974def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F : 975 F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;", 976 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f, 977 [hasPTX<72>, hasSM<86>]>; 978def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F : 979 F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 980 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f, 981 [hasPTX<72>, hasSM<86>]>; 982def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F : 983 F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 984 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f, 985 [hasPTX<72>, hasSM<86>]>; 986 987def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs, 988 Float32Regs, Float32Regs, int_nvvm_fmax_f>; 989def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;", 990 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>; 991def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;", 992 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f, 993 [hasPTX<70>, hasSM<80>]>; 994def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;", 995 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f, 996 [hasPTX<70>, hasSM<80>]>; 997def INT_NVVM_FMAX_XORSIGN_ABS_F : 998 F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;", 999 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f, 1000 [hasPTX<72>, hasSM<86>]>; 1001def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F : 1002 F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;", 1003 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f, 1004 [hasPTX<72>, hasSM<86>]>; 1005def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F : 1006 F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 1007 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f, 1008 [hasPTX<72>, hasSM<86>]>; 1009def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F : 1010 F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;", 1011 Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f, 1012 [hasPTX<72>, hasSM<86>]>; 1013 1014def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs, 1015 Float64Regs, Float64Regs, int_nvvm_fmin_d>; 1016def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs, 1017 Float64Regs, Float64Regs, int_nvvm_fmax_d>; 1018 1019// 1020// Min Max f16, f16x2, bf16, bf16x2 1021// 1022 1023class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC, 1024 list<Predicate> Preds = [hasPTX<70>, hasSM<80>]> { 1025 string Variant = V; 1026 Intrinsic Intr = I; 1027 NVPTXRegClass RegClass = RC; 1028 list<Predicate> Predicates = Preds; 1029} 1030 1031multiclass MIN_MAX<string IntName> { 1032 foreach P = [ 1033 MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16, 1034 int_nvvm_fmax_f16), Int16Regs>, 1035 MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16, 1036 int_nvvm_fmax_ftz_f16), Int16Regs>, 1037 MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16, 1038 int_nvvm_fmax_nan_f16), Int16Regs>, 1039 MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"), 1040 int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>, 1041 MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"), 1042 int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16), 1043 Int16Regs, [hasPTX<72>, hasSM<86>]>, 1044 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"), 1045 int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16), 1046 Int16Regs, [hasPTX<72>, hasSM<86>]>, 1047 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), 1048 int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16), 1049 Int16Regs, [hasPTX<72>, hasSM<86>]>, 1050 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"), 1051 int_nvvm_fmin_ftz_nan_xorsign_abs_f16, 1052 int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>, 1053 MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2, 1054 int_nvvm_fmax_f16x2), Int32Regs>, 1055 MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"), 1056 int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>, 1057 MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"), 1058 int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>, 1059 MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"), 1060 int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>, 1061 MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 1062 int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2), 1063 Int32Regs, [hasPTX<72>, hasSM<86>]>, 1064 MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 1065 int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2), 1066 Int32Regs, [hasPTX<72>, hasSM<86>]>, 1067 MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 1068 int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2), 1069 Int32Regs, [hasPTX<72>, hasSM<86>]>, 1070 MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"), 1071 int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2, 1072 int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2), 1073 Int32Regs, [hasPTX<72>, hasSM<86>]>, 1074 MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"), 1075 int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>, 1076 MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16, 1077 int_nvvm_fmax_nan_bf16), Int16Regs>, 1078 MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"), 1079 int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16), 1080 Int16Regs, [hasPTX<72>, hasSM<86>]>, 1081 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"), 1082 int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16), 1083 Int16Regs, [hasPTX<72>, hasSM<86>]>, 1084 MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2, 1085 int_nvvm_fmax_bf16x2), Int32Regs>, 1086 MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"), 1087 int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>, 1088 MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"), 1089 int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2), 1090 Int32Regs, [hasPTX<72>, hasSM<86>]>, 1091 MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"), 1092 int_nvvm_fmin_nan_xorsign_abs_bf16x2, 1093 int_nvvm_fmax_nan_xorsign_abs_bf16x2), 1094 Int32Regs, [hasPTX<72>, hasSM<86>]>] in { 1095 def P.Variant : F_MATH_2<!strconcat( 1096 IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"), 1097 P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>; 1098 } 1099} 1100 1101defm INT_NVVM_FMIN : MIN_MAX<"min">; 1102defm INT_NVVM_FMAN : MIN_MAX<"max">; 1103 1104// 1105// Multiplication 1106// 1107 1108def INT_NVVM_MULHI_S : F_MATH_2<"mul.hi.s16 \t$dst, $src0, $src1;", Int16Regs, 1109 Int16Regs, Int16Regs, int_nvvm_mulhi_s>; 1110def INT_NVVM_MULHI_US : F_MATH_2<"mul.hi.u16 \t$dst, $src0, $src1;", Int16Regs, 1111 Int16Regs, Int16Regs, int_nvvm_mulhi_us>; 1112def INT_NVVM_MULHI_I : F_MATH_2<"mul.hi.s32 \t$dst, $src0, $src1;", Int32Regs, 1113 Int32Regs, Int32Regs, int_nvvm_mulhi_i>; 1114def INT_NVVM_MULHI_UI : F_MATH_2<"mul.hi.u32 \t$dst, $src0, $src1;", Int32Regs, 1115 Int32Regs, Int32Regs, int_nvvm_mulhi_ui>; 1116def INT_NVVM_MULHI_LL : F_MATH_2<"mul.hi.s64 \t$dst, $src0, $src1;", Int64Regs, 1117 Int64Regs, Int64Regs, int_nvvm_mulhi_ll>; 1118def INT_NVVM_MULHI_ULL : F_MATH_2<"mul.hi.u64 \t$dst, $src0, $src1;", Int64Regs, 1119 Int64Regs, Int64Regs, int_nvvm_mulhi_ull>; 1120 1121def INT_NVVM_MUL_RN_FTZ_F : F_MATH_2<"mul.rn.ftz.f32 \t$dst, $src0, $src1;", 1122 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_ftz_f>; 1123def INT_NVVM_MUL_RN_F : F_MATH_2<"mul.rn.f32 \t$dst, $src0, $src1;", 1124 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rn_f>; 1125def INT_NVVM_MUL_RZ_FTZ_F : F_MATH_2<"mul.rz.ftz.f32 \t$dst, $src0, $src1;", 1126 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_ftz_f>; 1127def INT_NVVM_MUL_RZ_F : F_MATH_2<"mul.rz.f32 \t$dst, $src0, $src1;", 1128 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rz_f>; 1129def INT_NVVM_MUL_RM_FTZ_F : F_MATH_2<"mul.rm.ftz.f32 \t$dst, $src0, $src1;", 1130 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_ftz_f>; 1131def INT_NVVM_MUL_RM_F : F_MATH_2<"mul.rm.f32 \t$dst, $src0, $src1;", 1132 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rm_f>; 1133def INT_NVVM_MUL_RP_FTZ_F : F_MATH_2<"mul.rp.ftz.f32 \t$dst, $src0, $src1;", 1134 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_ftz_f>; 1135def INT_NVVM_MUL_RP_F : F_MATH_2<"mul.rp.f32 \t$dst, $src0, $src1;", 1136 Float32Regs, Float32Regs, Float32Regs, int_nvvm_mul_rp_f>; 1137 1138def INT_NVVM_MUL_RN_D : F_MATH_2<"mul.rn.f64 \t$dst, $src0, $src1;", 1139 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rn_d>; 1140def INT_NVVM_MUL_RZ_D : F_MATH_2<"mul.rz.f64 \t$dst, $src0, $src1;", 1141 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rz_d>; 1142def INT_NVVM_MUL_RM_D : F_MATH_2<"mul.rm.f64 \t$dst, $src0, $src1;", 1143 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rm_d>; 1144def INT_NVVM_MUL_RP_D : F_MATH_2<"mul.rp.f64 \t$dst, $src0, $src1;", 1145 Float64Regs, Float64Regs, Float64Regs, int_nvvm_mul_rp_d>; 1146 1147def INT_NVVM_MUL24_I : F_MATH_2<"mul24.lo.s32 \t$dst, $src0, $src1;", 1148 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_i>; 1149def INT_NVVM_MUL24_UI : F_MATH_2<"mul24.lo.u32 \t$dst, $src0, $src1;", 1150 Int32Regs, Int32Regs, Int32Regs, int_nvvm_mul24_ui>; 1151 1152// 1153// Div 1154// 1155 1156def INT_NVVM_DIV_APPROX_FTZ_F 1157 : F_MATH_2<"div.approx.ftz.f32 \t$dst, $src0, $src1;", Float32Regs, 1158 Float32Regs, Float32Regs, int_nvvm_div_approx_ftz_f>; 1159def INT_NVVM_DIV_APPROX_F : F_MATH_2<"div.approx.f32 \t$dst, $src0, $src1;", 1160 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_approx_f>; 1161 1162def INT_NVVM_DIV_RN_FTZ_F : F_MATH_2<"div.rn.ftz.f32 \t$dst, $src0, $src1;", 1163 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_ftz_f>; 1164def INT_NVVM_DIV_RN_F : F_MATH_2<"div.rn.f32 \t$dst, $src0, $src1;", 1165 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rn_f>; 1166def INT_NVVM_DIV_RZ_FTZ_F : F_MATH_2<"div.rz.ftz.f32 \t$dst, $src0, $src1;", 1167 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_ftz_f>; 1168def INT_NVVM_DIV_RZ_F : F_MATH_2<"div.rz.f32 \t$dst, $src0, $src1;", 1169 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rz_f>; 1170def INT_NVVM_DIV_RM_FTZ_F : F_MATH_2<"div.rm.ftz.f32 \t$dst, $src0, $src1;", 1171 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_ftz_f>; 1172def INT_NVVM_DIV_RM_F : F_MATH_2<"div.rm.f32 \t$dst, $src0, $src1;", 1173 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rm_f>; 1174def INT_NVVM_DIV_RP_FTZ_F : F_MATH_2<"div.rp.ftz.f32 \t$dst, $src0, $src1;", 1175 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_ftz_f>; 1176def INT_NVVM_DIV_RP_F : F_MATH_2<"div.rp.f32 \t$dst, $src0, $src1;", 1177 Float32Regs, Float32Regs, Float32Regs, int_nvvm_div_rp_f>; 1178 1179def INT_NVVM_DIV_RN_D : F_MATH_2<"div.rn.f64 \t$dst, $src0, $src1;", 1180 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rn_d>; 1181def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64 \t$dst, $src0, $src1;", 1182 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rz_d>; 1183def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64 \t$dst, $src0, $src1;", 1184 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rm_d>; 1185def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64 \t$dst, $src0, $src1;", 1186 Float64Regs, Float64Regs, Float64Regs, int_nvvm_div_rp_d>; 1187 1188def : Pat<(int_nvvm_div_full f32:$a, f32:$b), 1189 (FDIV32rr $a, $b)>; 1190 1191def : Pat<(int_nvvm_div_full f32:$a, fpimm:$b), 1192 (FDIV32ri $a, f32imm:$b)>; 1193 1194def : Pat<(int_nvvm_div_full_ftz f32:$a, f32:$b), 1195 (FDIV32rr_ftz $a, $b)>; 1196 1197def : Pat<(int_nvvm_div_full_ftz f32:$a, fpimm:$b), 1198 (FDIV32ri_ftz $a, f32imm:$b)>; 1199 1200// 1201// Sad 1202// 1203 1204def INT_NVVM_SAD_S : F_MATH_3<"sad.s16 \t$dst, $src0, $src1, $src2;", 1205 Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_s>; 1206def INT_NVVM_SAD_US : F_MATH_3<"sad.u16 \t$dst, $src0, $src1, $src2;", 1207 Int16Regs, Int16Regs, Int16Regs, Int16Regs, int_nvvm_sad_us>; 1208def INT_NVVM_SAD_I : F_MATH_3<"sad.s32 \t$dst, $src0, $src1, $src2;", 1209 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_i>; 1210def INT_NVVM_SAD_UI : F_MATH_3<"sad.u32 \t$dst, $src0, $src1, $src2;", 1211 Int32Regs, Int32Regs, Int32Regs, Int32Regs, int_nvvm_sad_ui>; 1212def INT_NVVM_SAD_LL : F_MATH_3<"sad.s64 \t$dst, $src0, $src1, $src2;", 1213 Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ll>; 1214def INT_NVVM_SAD_ULL : F_MATH_3<"sad.u64 \t$dst, $src0, $src1, $src2;", 1215 Int64Regs, Int64Regs, Int64Regs, Int64Regs, int_nvvm_sad_ull>; 1216 1217// 1218// Floor Ceil 1219// 1220 1221def : Pat<(int_nvvm_floor_ftz_f f32:$a), 1222 (CVT_f32_f32 $a, CvtRMI_FTZ)>; 1223def : Pat<(int_nvvm_floor_f f32:$a), 1224 (CVT_f32_f32 $a, CvtRMI)>; 1225def : Pat<(int_nvvm_floor_d f64:$a), 1226 (CVT_f64_f64 $a, CvtRMI)>; 1227 1228def : Pat<(int_nvvm_ceil_ftz_f f32:$a), 1229 (CVT_f32_f32 $a, CvtRPI_FTZ)>; 1230def : Pat<(int_nvvm_ceil_f f32:$a), 1231 (CVT_f32_f32 $a, CvtRPI)>; 1232def : Pat<(int_nvvm_ceil_d f64:$a), 1233 (CVT_f64_f64 $a, CvtRPI)>; 1234 1235// 1236// Abs 1237// 1238 1239def INT_NVVM_FABS_FTZ_F : F_MATH_1<"abs.ftz.f32 \t$dst, $src0;", Float32Regs, 1240 Float32Regs, int_nvvm_fabs_ftz_f>; 1241def INT_NVVM_FABS_F : F_MATH_1<"abs.f32 \t$dst, $src0;", Float32Regs, 1242 Float32Regs, int_nvvm_fabs_f>; 1243 1244def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs, 1245 Float64Regs, int_nvvm_fabs_d>; 1246 1247// 1248// copysign 1249// 1250 1251def fcopysign_nvptx : SDNode<"NVPTXISD::FCOPYSIGN", SDTFPBinOp>; 1252 1253def COPYSIGN_F : 1254 NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src0, Float32Regs:$src1), 1255 "copysign.f32 \t$dst, $src0, $src1;", 1256 [(set f32:$dst, (fcopysign_nvptx f32:$src1, f32:$src0))]>; 1257 1258def COPYSIGN_D : 1259 NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src0, Float64Regs:$src1), 1260 "copysign.f64 \t$dst, $src0, $src1;", 1261 [(set f64:$dst, (fcopysign_nvptx f64:$src1, f64:$src0))]>; 1262 1263// 1264// Abs, Neg bf16, bf16x2 1265// 1266 1267def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs, 1268 Int16Regs, int_nvvm_abs_bf16, [hasPTX<70>, hasSM<80>]>; 1269def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs, 1270 Int32Regs, int_nvvm_abs_bf16x2, [hasPTX<70>, hasSM<80>]>; 1271def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs, 1272 Int16Regs, int_nvvm_neg_bf16, [hasPTX<70>, hasSM<80>]>; 1273def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs, 1274 Int32Regs, int_nvvm_neg_bf16x2, [hasPTX<70>, hasSM<80>]>; 1275 1276// 1277// Round 1278// 1279 1280def : Pat<(int_nvvm_round_ftz_f f32:$a), 1281 (CVT_f32_f32 $a, CvtRNI_FTZ)>; 1282def : Pat<(int_nvvm_round_f f32:$a), 1283 (CVT_f32_f32 $a, CvtRNI)>; 1284def : Pat<(int_nvvm_round_d f64:$a), 1285 (CVT_f64_f64 $a, CvtRNI)>; 1286 1287// 1288// Trunc 1289// 1290 1291def : Pat<(int_nvvm_trunc_ftz_f f32:$a), 1292 (CVT_f32_f32 $a, CvtRZI_FTZ)>; 1293def : Pat<(int_nvvm_trunc_f f32:$a), 1294 (CVT_f32_f32 $a, CvtRZI)>; 1295def : Pat<(int_nvvm_trunc_d f64:$a), 1296 (CVT_f64_f64 $a, CvtRZI)>; 1297 1298// 1299// Saturate 1300// 1301 1302def : Pat<(int_nvvm_saturate_ftz_f f32:$a), 1303 (CVT_f32_f32 $a, CvtSAT_FTZ)>; 1304def : Pat<(int_nvvm_saturate_f f32:$a), 1305 (CVT_f32_f32 $a, CvtSAT)>; 1306def : Pat<(int_nvvm_saturate_d f64:$a), 1307 (CVT_f64_f64 $a, CvtSAT)>; 1308 1309// 1310// Exp2 Log2 1311// 1312 1313def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32 \t$dst, $src0;", 1314 Float32Regs, Float32Regs, int_nvvm_ex2_approx_ftz_f>; 1315def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;", 1316 Float32Regs, Float32Regs, int_nvvm_ex2_approx_f>; 1317def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;", 1318 Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>; 1319 1320def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;", 1321 Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>; 1322def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;", 1323 Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>; 1324 1325def : Pat<(fexp2 f32:$a), 1326 (INT_NVVM_EX2_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>; 1327def : Pat<(fexp2 f32:$a), 1328 (INT_NVVM_EX2_APPROX_F $a)>, Requires<[doNoF32FTZ]>; 1329def : Pat<(fexp2 f16:$a), 1330 (INT_NVVM_EX2_APPROX_F16 $a)>, Requires<[useFP16Math]>; 1331def : Pat<(fexp2 v2f16:$a), 1332 (INT_NVVM_EX2_APPROX_F16X2 $a)>, Requires<[useFP16Math]>; 1333 1334def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;", 1335 Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>; 1336def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32 \t$dst, $src0;", 1337 Float32Regs, Float32Regs, int_nvvm_lg2_approx_f>; 1338def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64 \t$dst, $src0;", 1339 Float64Regs, Float64Regs, int_nvvm_lg2_approx_d>; 1340 1341def : Pat<(flog2 f32:$a), (INT_NVVM_LG2_APPROX_FTZ_F $a)>, 1342 Requires<[doF32FTZ]>; 1343def : Pat<(flog2 f32:$a), (INT_NVVM_LG2_APPROX_F $a)>, 1344 Requires<[doNoF32FTZ]>; 1345 1346// 1347// Sin Cos 1348// 1349 1350def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32 \t$dst, $src0;", 1351 Float32Regs, Float32Regs, int_nvvm_sin_approx_ftz_f>; 1352def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32 \t$dst, $src0;", 1353 Float32Regs, Float32Regs, int_nvvm_sin_approx_f>; 1354 1355def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;", 1356 Float32Regs, Float32Regs, int_nvvm_cos_approx_ftz_f>; 1357def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;", 1358 Float32Regs, Float32Regs, int_nvvm_cos_approx_f>; 1359 1360// 1361// Fma 1362// 1363 1364class FMA_TUPLE<string V, Intrinsic I, NVPTXRegClass RC, 1365 list<Predicate> Preds = []> { 1366 string Variant = V; 1367 Intrinsic Intr = I; 1368 NVPTXRegClass RegClass = RC; 1369 list<Predicate> Predicates = Preds; 1370} 1371 1372multiclass FMA_INST { 1373 foreach P = [ 1374 FMA_TUPLE<"_rn_f64", int_nvvm_fma_rn_d, Float64Regs>, 1375 FMA_TUPLE<"_rz_f64", int_nvvm_fma_rz_d, Float64Regs>, 1376 FMA_TUPLE<"_rm_f64", int_nvvm_fma_rm_d, Float64Regs>, 1377 FMA_TUPLE<"_rp_f64", int_nvvm_fma_rp_d, Float64Regs>, 1378 1379 FMA_TUPLE<"_rn_ftz_f32", int_nvvm_fma_rn_ftz_f, Float32Regs>, 1380 FMA_TUPLE<"_rn_f32", int_nvvm_fma_rn_f, Float32Regs>, 1381 FMA_TUPLE<"_rz_ftz_f32", int_nvvm_fma_rz_ftz_f, Float32Regs>, 1382 FMA_TUPLE<"_rz_f32", int_nvvm_fma_rz_f, Float32Regs>, 1383 FMA_TUPLE<"_rm_f32", int_nvvm_fma_rm_f, Float32Regs>, 1384 FMA_TUPLE<"_rm_ftz_f32", int_nvvm_fma_rm_ftz_f, Float32Regs>, 1385 FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>, 1386 FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>, 1387 1388 FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>, 1389 FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs, 1390 [hasPTX<42>, hasSM<53>]>, 1391 FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs, 1392 [hasPTX<42>, hasSM<53>]>, 1393 FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs, 1394 [hasPTX<42>, hasSM<53>]>, 1395 FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs, 1396 [hasPTX<70>, hasSM<80>]>, 1397 FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs, 1398 [hasPTX<70>, hasSM<80>]>, 1399 1400 FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>, 1401 FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs, 1402 [hasPTX<70>, hasSM<80>]>, 1403 FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs, 1404 [hasPTX<70>, hasSM<80>]>, 1405 FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs, 1406 [hasPTX<70>, hasSM<80>]>, 1407 FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs, 1408 [hasPTX<70>, hasSM<80>]>, 1409 FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs, 1410 [hasPTX<70>, hasSM<80>]>, 1411 1412 FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs, 1413 [hasPTX<42>, hasSM<53>]>, 1414 FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs, 1415 [hasPTX<42>, hasSM<53>]>, 1416 FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs, 1417 [hasPTX<42>, hasSM<53>]>, 1418 FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2, 1419 Int32Regs, [hasPTX<42>, hasSM<53>]>, 1420 FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs, 1421 [hasPTX<70>, hasSM<80>]>, 1422 FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2, 1423 Int32Regs, [hasPTX<70>, hasSM<80>]>, 1424 FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs, 1425 [hasPTX<70>, hasSM<80>]>, 1426 FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs, 1427 [hasPTX<70>, hasSM<80>]> 1428 ] in { 1429 def P.Variant : 1430 F_MATH_3<!strconcat("fma", 1431 !subst("_", ".", P.Variant), " \t$dst, $src0, $src1, $src2;"), 1432 P.RegClass, P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>; 1433 } 1434} 1435 1436defm INT_NVVM_FMA : FMA_INST; 1437 1438// 1439// Rcp 1440// 1441 1442def INT_NVVM_RCP_RN_FTZ_F : F_MATH_1<"rcp.rn.ftz.f32 \t$dst, $src0;", 1443 Float32Regs, Float32Regs, int_nvvm_rcp_rn_ftz_f>; 1444def INT_NVVM_RCP_RN_F : F_MATH_1<"rcp.rn.f32 \t$dst, $src0;", 1445 Float32Regs, Float32Regs, int_nvvm_rcp_rn_f>; 1446def INT_NVVM_RCP_RZ_FTZ_F : F_MATH_1<"rcp.rz.ftz.f32 \t$dst, $src0;", 1447 Float32Regs, Float32Regs, int_nvvm_rcp_rz_ftz_f>; 1448def INT_NVVM_RCP_RZ_F : F_MATH_1<"rcp.rz.f32 \t$dst, $src0;", 1449 Float32Regs, Float32Regs, int_nvvm_rcp_rz_f>; 1450def INT_NVVM_RCP_RM_FTZ_F : F_MATH_1<"rcp.rm.ftz.f32 \t$dst, $src0;", 1451 Float32Regs, Float32Regs, int_nvvm_rcp_rm_ftz_f>; 1452def INT_NVVM_RCP_RM_F : F_MATH_1<"rcp.rm.f32 \t$dst, $src0;", 1453 Float32Regs, Float32Regs, int_nvvm_rcp_rm_f>; 1454def INT_NVVM_RCP_RP_FTZ_F : F_MATH_1<"rcp.rp.ftz.f32 \t$dst, $src0;", 1455 Float32Regs, Float32Regs, int_nvvm_rcp_rp_ftz_f>; 1456def INT_NVVM_RCP_RP_F : F_MATH_1<"rcp.rp.f32 \t$dst, $src0;", 1457 Float32Regs, Float32Regs, int_nvvm_rcp_rp_f>; 1458 1459def INT_NVVM_RCP_RN_D : F_MATH_1<"rcp.rn.f64 \t$dst, $src0;", Float64Regs, 1460 Float64Regs, int_nvvm_rcp_rn_d>; 1461def INT_NVVM_RCP_RZ_D : F_MATH_1<"rcp.rz.f64 \t$dst, $src0;", Float64Regs, 1462 Float64Regs, int_nvvm_rcp_rz_d>; 1463def INT_NVVM_RCP_RM_D : F_MATH_1<"rcp.rm.f64 \t$dst, $src0;", Float64Regs, 1464 Float64Regs, int_nvvm_rcp_rm_d>; 1465def INT_NVVM_RCP_RP_D : F_MATH_1<"rcp.rp.f64 \t$dst, $src0;", Float64Regs, 1466 Float64Regs, int_nvvm_rcp_rp_d>; 1467 1468def INT_NVVM_RCP_APPROX_FTZ_F : F_MATH_1<"rcp.approx.ftz.f32 \t$dst, $src0;", 1469 Float32Regs, Float32Regs, int_nvvm_rcp_approx_ftz_f>; 1470def INT_NVVM_RCP_APPROX_FTZ_D : F_MATH_1<"rcp.approx.ftz.f64 \t$dst, $src0;", 1471 Float64Regs, Float64Regs, int_nvvm_rcp_approx_ftz_d>; 1472 1473// 1474// Sqrt 1475// 1476 1477def INT_NVVM_SQRT_RN_FTZ_F : F_MATH_1<"sqrt.rn.ftz.f32 \t$dst, $src0;", 1478 Float32Regs, Float32Regs, int_nvvm_sqrt_rn_ftz_f>; 1479def INT_NVVM_SQRT_RN_F : F_MATH_1<"sqrt.rn.f32 \t$dst, $src0;", Float32Regs, 1480 Float32Regs, int_nvvm_sqrt_rn_f>; 1481def INT_NVVM_SQRT_RZ_FTZ_F : F_MATH_1<"sqrt.rz.ftz.f32 \t$dst, $src0;", 1482 Float32Regs, Float32Regs, int_nvvm_sqrt_rz_ftz_f>; 1483def INT_NVVM_SQRT_RZ_F : F_MATH_1<"sqrt.rz.f32 \t$dst, $src0;", Float32Regs, 1484 Float32Regs, int_nvvm_sqrt_rz_f>; 1485def INT_NVVM_SQRT_RM_FTZ_F : F_MATH_1<"sqrt.rm.ftz.f32 \t$dst, $src0;", 1486 Float32Regs, Float32Regs, int_nvvm_sqrt_rm_ftz_f>; 1487def INT_NVVM_SQRT_RM_F : F_MATH_1<"sqrt.rm.f32 \t$dst, $src0;", Float32Regs, 1488 Float32Regs, int_nvvm_sqrt_rm_f>; 1489def INT_NVVM_SQRT_RP_FTZ_F : F_MATH_1<"sqrt.rp.ftz.f32 \t$dst, $src0;", 1490 Float32Regs, Float32Regs, int_nvvm_sqrt_rp_ftz_f>; 1491def INT_NVVM_SQRT_RP_F : F_MATH_1<"sqrt.rp.f32 \t$dst, $src0;", Float32Regs, 1492 Float32Regs, int_nvvm_sqrt_rp_f>; 1493def INT_NVVM_SQRT_APPROX_FTZ_F : F_MATH_1<"sqrt.approx.ftz.f32 \t$dst, $src0;", 1494 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_ftz_f>; 1495def INT_NVVM_SQRT_APPROX_F : F_MATH_1<"sqrt.approx.f32 \t$dst, $src0;", 1496 Float32Regs, Float32Regs, int_nvvm_sqrt_approx_f>; 1497 1498def INT_NVVM_SQRT_RN_D : F_MATH_1<"sqrt.rn.f64 \t$dst, $src0;", Float64Regs, 1499 Float64Regs, int_nvvm_sqrt_rn_d>; 1500def INT_NVVM_SQRT_RZ_D : F_MATH_1<"sqrt.rz.f64 \t$dst, $src0;", Float64Regs, 1501 Float64Regs, int_nvvm_sqrt_rz_d>; 1502def INT_NVVM_SQRT_RM_D : F_MATH_1<"sqrt.rm.f64 \t$dst, $src0;", Float64Regs, 1503 Float64Regs, int_nvvm_sqrt_rm_d>; 1504def INT_NVVM_SQRT_RP_D : F_MATH_1<"sqrt.rp.f64 \t$dst, $src0;", Float64Regs, 1505 Float64Regs, int_nvvm_sqrt_rp_d>; 1506 1507// nvvm_sqrt intrinsic 1508def : Pat<(int_nvvm_sqrt_f f32:$a), 1509 (INT_NVVM_SQRT_RN_FTZ_F $a)>, Requires<[doF32FTZ, do_SQRTF32_RN]>; 1510def : Pat<(int_nvvm_sqrt_f f32:$a), 1511 (INT_NVVM_SQRT_RN_F $a)>, Requires<[do_SQRTF32_RN]>; 1512def : Pat<(int_nvvm_sqrt_f f32:$a), 1513 (INT_NVVM_SQRT_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>; 1514def : Pat<(int_nvvm_sqrt_f f32:$a), 1515 (INT_NVVM_SQRT_APPROX_F $a)>; 1516 1517// 1518// Rsqrt 1519// 1520 1521def INT_NVVM_RSQRT_APPROX_FTZ_F 1522 : F_MATH_1<"rsqrt.approx.ftz.f32 \t$dst, $src0;", Float32Regs, Float32Regs, 1523 int_nvvm_rsqrt_approx_ftz_f>; 1524def INT_NVVM_RSQRT_APPROX_FTZ_D 1525 : F_MATH_1<"rsqrt.approx.ftz.f64 \t$dst, $src0;", Float64Regs, Float64Regs, 1526 int_nvvm_rsqrt_approx_ftz_d>; 1527 1528def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32 \t$dst, $src0;", 1529 Float32Regs, Float32Regs, int_nvvm_rsqrt_approx_f>; 1530def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64 \t$dst, $src0;", 1531 Float64Regs, Float64Regs, int_nvvm_rsqrt_approx_d>; 1532 1533// 1.0f / sqrt_approx -> rsqrt_approx 1534def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_f f32:$a)), 1535 (INT_NVVM_RSQRT_APPROX_F $a)>, 1536 Requires<[doRsqrtOpt]>; 1537def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_approx_ftz_f f32:$a)), 1538 (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>, 1539 Requires<[doRsqrtOpt]>; 1540// same for int_nvvm_sqrt_f when non-precision sqrt is requested 1541def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f f32:$a)), 1542 (INT_NVVM_RSQRT_APPROX_F $a)>, 1543 Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>; 1544def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f f32:$a)), 1545 (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>, 1546 Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>; 1547 1548def: Pat<(fdiv FloatConst1, (fsqrt f32:$a)), 1549 (INT_NVVM_RSQRT_APPROX_F $a)>, 1550 Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doNoF32FTZ]>; 1551def: Pat<(fdiv FloatConst1, (fsqrt f32:$a)), 1552 (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>, 1553 Requires<[doRsqrtOpt, do_SQRTF32_APPROX, doF32FTZ]>; 1554// 1555// Add 1556// 1557 1558def INT_NVVM_ADD_RN_FTZ_F : F_MATH_2<"add.rn.ftz.f32 \t$dst, $src0, $src1;", 1559 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_ftz_f>; 1560def INT_NVVM_ADD_RN_F : F_MATH_2<"add.rn.f32 \t$dst, $src0, $src1;", 1561 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rn_f>; 1562def INT_NVVM_ADD_RZ_FTZ_F : F_MATH_2<"add.rz.ftz.f32 \t$dst, $src0, $src1;", 1563 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_ftz_f>; 1564def INT_NVVM_ADD_RZ_F : F_MATH_2<"add.rz.f32 \t$dst, $src0, $src1;", 1565 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rz_f>; 1566def INT_NVVM_ADD_RM_FTZ_F : F_MATH_2<"add.rm.ftz.f32 \t$dst, $src0, $src1;", 1567 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_ftz_f>; 1568def INT_NVVM_ADD_RM_F : F_MATH_2<"add.rm.f32 \t$dst, $src0, $src1;", 1569 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rm_f>; 1570def INT_NVVM_ADD_RP_FTZ_F : F_MATH_2<"add.rp.ftz.f32 \t$dst, $src0, $src1;", 1571 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_ftz_f>; 1572def INT_NVVM_ADD_RP_F : F_MATH_2<"add.rp.f32 \t$dst, $src0, $src1;", 1573 Float32Regs, Float32Regs, Float32Regs, int_nvvm_add_rp_f>; 1574 1575def INT_NVVM_ADD_RN_D : F_MATH_2<"add.rn.f64 \t$dst, $src0, $src1;", 1576 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rn_d>; 1577def INT_NVVM_ADD_RZ_D : F_MATH_2<"add.rz.f64 \t$dst, $src0, $src1;", 1578 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rz_d>; 1579def INT_NVVM_ADD_RM_D : F_MATH_2<"add.rm.f64 \t$dst, $src0, $src1;", 1580 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rm_d>; 1581def INT_NVVM_ADD_RP_D : F_MATH_2<"add.rp.f64 \t$dst, $src0, $src1;", 1582 Float64Regs, Float64Regs, Float64Regs, int_nvvm_add_rp_d>; 1583 1584// 1585// BFIND 1586// 1587 1588foreach t = [I32RT, I64RT] in { 1589 foreach sign = ["s", "u"] in { 1590 defvar flo_intrin = !cast<Intrinsic>("int_nvvm_flo_" # sign); 1591 def BFIND_ # sign # t.Size 1592 : NVPTXInst<(outs Int32Regs:$dst), (ins t.RC:$src), 1593 "bfind." # sign # t.Size # " \t$dst, $src;", 1594 [(set i32:$dst, (flo_intrin t.Ty:$src, 0))]>; 1595 1596 def BFIND_SHIFTAMT_ # sign # t.Size 1597 : NVPTXInst<(outs Int32Regs:$dst), (ins t.RC:$src), 1598 "bfind.shiftamt." # sign # t.Size # " \t$dst, $src;", 1599 [(set i32:$dst, (flo_intrin t.Ty:$src, -1))]>; 1600 } 1601} 1602 1603// 1604// Convert 1605// 1606 1607def : Pat<(int_nvvm_d2f_rn_ftz f64:$a), 1608 (CVT_f32_f64 $a, CvtRN_FTZ)>; 1609def : Pat<(int_nvvm_d2f_rn f64:$a), 1610 (CVT_f32_f64 $a, CvtRN)>; 1611def : Pat<(int_nvvm_d2f_rz_ftz f64:$a), 1612 (CVT_f32_f64 $a, CvtRZ_FTZ)>; 1613def : Pat<(int_nvvm_d2f_rz f64:$a), 1614 (CVT_f32_f64 $a, CvtRZ)>; 1615def : Pat<(int_nvvm_d2f_rm_ftz f64:$a), 1616 (CVT_f32_f64 $a, CvtRM_FTZ)>; 1617def : Pat<(int_nvvm_d2f_rm f64:$a), 1618 (CVT_f32_f64 $a, CvtRM)>; 1619def : Pat<(int_nvvm_d2f_rp_ftz f64:$a), 1620 (CVT_f32_f64 $a, CvtRP_FTZ)>; 1621def : Pat<(int_nvvm_d2f_rp f64:$a), 1622 (CVT_f32_f64 $a, CvtRP)>; 1623 1624def : Pat<(int_nvvm_d2i_rn f64:$a), 1625 (CVT_s32_f64 $a, CvtRNI)>; 1626def : Pat<(int_nvvm_d2i_rz f64:$a), 1627 (CVT_s32_f64 $a, CvtRZI)>; 1628def : Pat<(int_nvvm_d2i_rm f64:$a), 1629 (CVT_s32_f64 $a, CvtRMI)>; 1630def : Pat<(int_nvvm_d2i_rp f64:$a), 1631 (CVT_s32_f64 $a, CvtRPI)>; 1632 1633def : Pat<(int_nvvm_d2ui_rn f64:$a), 1634 (CVT_u32_f64 $a, CvtRNI)>; 1635def : Pat<(int_nvvm_d2ui_rz f64:$a), 1636 (CVT_u32_f64 $a, CvtRZI)>; 1637def : Pat<(int_nvvm_d2ui_rm f64:$a), 1638 (CVT_u32_f64 $a, CvtRMI)>; 1639def : Pat<(int_nvvm_d2ui_rp f64:$a), 1640 (CVT_u32_f64 $a, CvtRPI)>; 1641 1642def : Pat<(int_nvvm_i2d_rn i32:$a), 1643 (CVT_f64_s32 $a, CvtRN)>; 1644def : Pat<(int_nvvm_i2d_rz i32:$a), 1645 (CVT_f64_s32 $a, CvtRZ)>; 1646def : Pat<(int_nvvm_i2d_rm i32:$a), 1647 (CVT_f64_s32 $a, CvtRM)>; 1648def : Pat<(int_nvvm_i2d_rp i32:$a), 1649 (CVT_f64_s32 $a, CvtRP)>; 1650 1651def : Pat<(int_nvvm_ui2d_rn i32:$a), 1652 (CVT_f64_u32 $a, CvtRN)>; 1653def : Pat<(int_nvvm_ui2d_rz i32:$a), 1654 (CVT_f64_u32 $a, CvtRZ)>; 1655def : Pat<(int_nvvm_ui2d_rm i32:$a), 1656 (CVT_f64_u32 $a, CvtRM)>; 1657def : Pat<(int_nvvm_ui2d_rp i32:$a), 1658 (CVT_f64_u32 $a, CvtRP)>; 1659 1660def : Pat<(int_nvvm_f2i_rn_ftz f32:$a), 1661 (CVT_s32_f32 $a, CvtRNI_FTZ)>; 1662def : Pat<(int_nvvm_f2i_rn f32:$a), 1663 (CVT_s32_f32 $a, CvtRNI)>; 1664def : Pat<(int_nvvm_f2i_rz_ftz f32:$a), 1665 (CVT_s32_f32 $a, CvtRZI_FTZ)>; 1666def : Pat<(int_nvvm_f2i_rz f32:$a), 1667 (CVT_s32_f32 $a, CvtRZI)>; 1668def : Pat<(int_nvvm_f2i_rm_ftz f32:$a), 1669 (CVT_s32_f32 $a, CvtRMI_FTZ)>; 1670def : Pat<(int_nvvm_f2i_rm f32:$a), 1671 (CVT_s32_f32 $a, CvtRMI)>; 1672def : Pat<(int_nvvm_f2i_rp_ftz f32:$a), 1673 (CVT_s32_f32 $a, CvtRPI_FTZ)>; 1674def : Pat<(int_nvvm_f2i_rp f32:$a), 1675 (CVT_s32_f32 $a, CvtRPI)>; 1676 1677def : Pat<(int_nvvm_f2ui_rn_ftz f32:$a), 1678 (CVT_u32_f32 $a, CvtRNI_FTZ)>; 1679def : Pat<(int_nvvm_f2ui_rn f32:$a), 1680 (CVT_u32_f32 $a, CvtRNI)>; 1681def : Pat<(int_nvvm_f2ui_rz_ftz f32:$a), 1682 (CVT_u32_f32 $a, CvtRZI_FTZ)>; 1683def : Pat<(int_nvvm_f2ui_rz f32:$a), 1684 (CVT_u32_f32 $a, CvtRZI)>; 1685def : Pat<(int_nvvm_f2ui_rm_ftz f32:$a), 1686 (CVT_u32_f32 $a, CvtRMI_FTZ)>; 1687def : Pat<(int_nvvm_f2ui_rm f32:$a), 1688 (CVT_u32_f32 $a, CvtRMI)>; 1689def : Pat<(int_nvvm_f2ui_rp_ftz f32:$a), 1690 (CVT_u32_f32 $a, CvtRPI_FTZ)>; 1691def : Pat<(int_nvvm_f2ui_rp f32:$a), 1692 (CVT_u32_f32 $a, CvtRPI)>; 1693 1694def : Pat<(int_nvvm_i2f_rn i32:$a), 1695 (CVT_f32_s32 $a, CvtRN)>; 1696def : Pat<(int_nvvm_i2f_rz i32:$a), 1697 (CVT_f32_s32 $a, CvtRZ)>; 1698def : Pat<(int_nvvm_i2f_rm i32:$a), 1699 (CVT_f32_s32 $a, CvtRM)>; 1700def : Pat<(int_nvvm_i2f_rp i32:$a), 1701 (CVT_f32_s32 $a, CvtRP)>; 1702 1703def : Pat<(int_nvvm_ui2f_rn i32:$a), 1704 (CVT_f32_u32 $a, CvtRN)>; 1705def : Pat<(int_nvvm_ui2f_rz i32:$a), 1706 (CVT_f32_u32 $a, CvtRZ)>; 1707def : Pat<(int_nvvm_ui2f_rm i32:$a), 1708 (CVT_f32_u32 $a, CvtRM)>; 1709def : Pat<(int_nvvm_ui2f_rp i32:$a), 1710 (CVT_f32_u32 $a, CvtRP)>; 1711 1712def : Pat<(int_nvvm_ff2bf16x2_rn f32:$a, f32:$b), 1713 (CVT_bf16x2_f32 $a, $b, CvtRN)>; 1714def : Pat<(int_nvvm_ff2bf16x2_rn_relu f32:$a, f32:$b), 1715 (CVT_bf16x2_f32 $a, $b, CvtRN_RELU)>; 1716def : Pat<(int_nvvm_ff2bf16x2_rz f32:$a, f32:$b), 1717 (CVT_bf16x2_f32 $a, $b, CvtRZ)>; 1718def : Pat<(int_nvvm_ff2bf16x2_rz_relu f32:$a, f32:$b), 1719 (CVT_bf16x2_f32 $a, $b, CvtRZ_RELU)>; 1720 1721def : Pat<(int_nvvm_ff2f16x2_rn f32:$a, f32:$b), 1722 (CVT_f16x2_f32 $a, $b, CvtRN)>; 1723def : Pat<(int_nvvm_ff2f16x2_rn_relu f32:$a, f32:$b), 1724 (CVT_f16x2_f32 $a, $b, CvtRN_RELU)>; 1725def : Pat<(int_nvvm_ff2f16x2_rz f32:$a, f32:$b), 1726 (CVT_f16x2_f32 $a, $b, CvtRZ)>; 1727def : Pat<(int_nvvm_ff2f16x2_rz_relu f32:$a, f32:$b), 1728 (CVT_f16x2_f32 $a, $b, CvtRZ_RELU)>; 1729 1730def : Pat<(int_nvvm_f2bf16_rn f32:$a), 1731 (CVT_bf16_f32 $a, CvtRN)>; 1732def : Pat<(int_nvvm_f2bf16_rn_relu f32:$a), 1733 (CVT_bf16_f32 $a, CvtRN_RELU)>; 1734def : Pat<(int_nvvm_f2bf16_rz f32:$a), 1735 (CVT_bf16_f32 $a, CvtRZ)>; 1736def : Pat<(int_nvvm_f2bf16_rz_relu f32:$a), 1737 (CVT_bf16_f32 $a, CvtRZ_RELU)>; 1738 1739def INT_NVVM_LOHI_I2D : F_MATH_2<"mov.b64 \t$dst, {{$src0, $src1}};", 1740 Float64Regs, Int32Regs, Int32Regs, int_nvvm_lohi_i2d>; 1741 1742def INT_NVVM_D2I_LO : F_MATH_1< 1743 !strconcat("{{\n\t", 1744 ".reg .b32 %temp; \n\t", 1745 "mov.b64 \t{$dst, %temp}, $src0;\n\t", 1746 "}}"), 1747 Int32Regs, Float64Regs, int_nvvm_d2i_lo>; 1748def INT_NVVM_D2I_HI : F_MATH_1< 1749 !strconcat("{{\n\t", 1750 ".reg .b32 %temp; \n\t", 1751 "mov.b64 \t{%temp, $dst}, $src0;\n\t", 1752 "}}"), 1753 Int32Regs, Float64Regs, int_nvvm_d2i_hi>; 1754 1755def : Pat<(int_nvvm_f2ll_rn_ftz f32:$a), 1756 (CVT_s64_f32 $a, CvtRNI_FTZ)>; 1757def : Pat<(int_nvvm_f2ll_rn f32:$a), 1758 (CVT_s64_f32 $a, CvtRNI)>; 1759def : Pat<(int_nvvm_f2ll_rz_ftz f32:$a), 1760 (CVT_s64_f32 $a, CvtRZI_FTZ)>; 1761def : Pat<(int_nvvm_f2ll_rz f32:$a), 1762 (CVT_s64_f32 $a, CvtRZI)>; 1763def : Pat<(int_nvvm_f2ll_rm_ftz f32:$a), 1764 (CVT_s64_f32 $a, CvtRMI_FTZ)>; 1765def : Pat<(int_nvvm_f2ll_rm f32:$a), 1766 (CVT_s64_f32 $a, CvtRMI)>; 1767def : Pat<(int_nvvm_f2ll_rp_ftz f32:$a), 1768 (CVT_s64_f32 $a, CvtRPI_FTZ)>; 1769def : Pat<(int_nvvm_f2ll_rp f32:$a), 1770 (CVT_s64_f32 $a, CvtRPI)>; 1771 1772def : Pat<(int_nvvm_f2ull_rn_ftz f32:$a), 1773 (CVT_u64_f32 $a, CvtRNI_FTZ)>; 1774def : Pat<(int_nvvm_f2ull_rn f32:$a), 1775 (CVT_u64_f32 $a, CvtRNI)>; 1776def : Pat<(int_nvvm_f2ull_rz_ftz f32:$a), 1777 (CVT_u64_f32 $a, CvtRZI_FTZ)>; 1778def : Pat<(int_nvvm_f2ull_rz f32:$a), 1779 (CVT_u64_f32 $a, CvtRZI)>; 1780def : Pat<(int_nvvm_f2ull_rm_ftz f32:$a), 1781 (CVT_u64_f32 $a, CvtRMI_FTZ)>; 1782def : Pat<(int_nvvm_f2ull_rm f32:$a), 1783 (CVT_u64_f32 $a, CvtRMI)>; 1784def : Pat<(int_nvvm_f2ull_rp_ftz f32:$a), 1785 (CVT_u64_f32 $a, CvtRPI_FTZ)>; 1786def : Pat<(int_nvvm_f2ull_rp f32:$a), 1787 (CVT_u64_f32 $a, CvtRPI)>; 1788 1789def : Pat<(int_nvvm_d2ll_rn f64:$a), 1790 (CVT_s64_f64 $a, CvtRNI)>; 1791def : Pat<(int_nvvm_d2ll_rz f64:$a), 1792 (CVT_s64_f64 $a, CvtRZI)>; 1793def : Pat<(int_nvvm_d2ll_rm f64:$a), 1794 (CVT_s64_f64 $a, CvtRMI)>; 1795def : Pat<(int_nvvm_d2ll_rp f64:$a), 1796 (CVT_s64_f64 $a, CvtRPI)>; 1797 1798def : Pat<(int_nvvm_d2ull_rn f64:$a), 1799 (CVT_u64_f64 $a, CvtRNI)>; 1800def : Pat<(int_nvvm_d2ull_rz f64:$a), 1801 (CVT_u64_f64 $a, CvtRZI)>; 1802def : Pat<(int_nvvm_d2ull_rm f64:$a), 1803 (CVT_u64_f64 $a, CvtRMI)>; 1804def : Pat<(int_nvvm_d2ull_rp f64:$a), 1805 (CVT_u64_f64 $a, CvtRPI)>; 1806 1807def : Pat<(int_nvvm_ll2f_rn i64:$a), 1808 (CVT_f32_s64 $a, CvtRN)>; 1809def : Pat<(int_nvvm_ll2f_rz i64:$a), 1810 (CVT_f32_s64 $a, CvtRZ)>; 1811def : Pat<(int_nvvm_ll2f_rm i64:$a), 1812 (CVT_f32_s64 $a, CvtRM)>; 1813def : Pat<(int_nvvm_ll2f_rp i64:$a), 1814 (CVT_f32_s64 $a, CvtRP)>; 1815 1816def : Pat<(int_nvvm_ull2f_rn i64:$a), 1817 (CVT_f32_u64 $a, CvtRN)>; 1818def : Pat<(int_nvvm_ull2f_rz i64:$a), 1819 (CVT_f32_u64 $a, CvtRZ)>; 1820def : Pat<(int_nvvm_ull2f_rm i64:$a), 1821 (CVT_f32_u64 $a, CvtRM)>; 1822def : Pat<(int_nvvm_ull2f_rp i64:$a), 1823 (CVT_f32_u64 $a, CvtRP)>; 1824 1825def : Pat<(int_nvvm_ll2d_rn i64:$a), 1826 (CVT_f64_s64 $a, CvtRN)>; 1827def : Pat<(int_nvvm_ll2d_rz i64:$a), 1828 (CVT_f64_s64 $a, CvtRZ)>; 1829def : Pat<(int_nvvm_ll2d_rm i64:$a), 1830 (CVT_f64_s64 $a, CvtRM)>; 1831def : Pat<(int_nvvm_ll2d_rp i64:$a), 1832 (CVT_f64_s64 $a, CvtRP)>; 1833 1834def : Pat<(int_nvvm_ull2d_rn i64:$a), 1835 (CVT_f64_u64 $a, CvtRN)>; 1836def : Pat<(int_nvvm_ull2d_rz i64:$a), 1837 (CVT_f64_u64 $a, CvtRZ)>; 1838def : Pat<(int_nvvm_ull2d_rm i64:$a), 1839 (CVT_f64_u64 $a, CvtRM)>; 1840def : Pat<(int_nvvm_ull2d_rp i64:$a), 1841 (CVT_f64_u64 $a, CvtRP)>; 1842 1843 1844def : Pat<(int_nvvm_f2h_rn_ftz f32:$a), 1845 (CVT_f16_f32 $a, CvtRN_FTZ)>; 1846def : Pat<(int_nvvm_f2h_rn f32:$a), 1847 (CVT_f16_f32 $a, CvtRN)>; 1848 1849def : Pat<(int_nvvm_ff_to_e4m3x2_rn f32:$a, f32:$b), 1850 (CVT_e4m3x2_f32 $a, $b, CvtRN)>; 1851def : Pat<(int_nvvm_ff_to_e4m3x2_rn_relu f32:$a, f32:$b), 1852 (CVT_e4m3x2_f32 $a, $b, CvtRN_RELU)>; 1853def : Pat<(int_nvvm_ff_to_e5m2x2_rn f32:$a, f32:$b), 1854 (CVT_e5m2x2_f32 $a, $b, CvtRN)>; 1855def : Pat<(int_nvvm_ff_to_e5m2x2_rn_relu f32:$a, f32:$b), 1856 (CVT_e5m2x2_f32 $a, $b, CvtRN_RELU)>; 1857 1858def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn Int32Regs:$a), 1859 (CVT_e4m3x2_f16x2 $a, CvtRN)>; 1860def : Pat<(int_nvvm_f16x2_to_e4m3x2_rn_relu Int32Regs:$a), 1861 (CVT_e4m3x2_f16x2 $a, CvtRN_RELU)>; 1862def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn Int32Regs:$a), 1863 (CVT_e5m2x2_f16x2 $a, CvtRN)>; 1864def : Pat<(int_nvvm_f16x2_to_e5m2x2_rn_relu Int32Regs:$a), 1865 (CVT_e5m2x2_f16x2 $a, CvtRN_RELU)>; 1866 1867def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn Int16Regs:$a), 1868 (CVT_f16x2_e4m3x2 $a, CvtRN)>; 1869def : Pat<(int_nvvm_e4m3x2_to_f16x2_rn_relu Int16Regs:$a), 1870 (CVT_f16x2_e4m3x2 $a, CvtRN_RELU)>; 1871def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn Int16Regs:$a), 1872 (CVT_f16x2_e5m2x2 $a, CvtRN)>; 1873def : Pat<(int_nvvm_e5m2x2_to_f16x2_rn_relu Int16Regs:$a), 1874 (CVT_f16x2_e5m2x2 $a, CvtRN_RELU)>; 1875 1876// 1877// FNS 1878// 1879 1880class INT_FNS_MBO<dag ins, dag Operands> 1881 : NVPTXInst<(outs Int32Regs:$dst), ins, 1882 "fns.b32 \t$dst, $mask, $base, $offset;", 1883 [(set i32:$dst, Operands)]>, 1884 Requires<[hasPTX<60>, hasSM<30>]>; 1885 1886def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset), 1887 (int_nvvm_fns i32:$mask, i32:$base, i32:$offset)>; 1888def INT_FNS_rri : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, i32imm:$offset), 1889 (int_nvvm_fns i32:$mask, i32:$base, imm:$offset)>; 1890def INT_FNS_rir : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, Int32Regs:$offset), 1891 (int_nvvm_fns i32:$mask, imm:$base, i32:$offset)>; 1892def INT_FNS_rii : INT_FNS_MBO<(ins Int32Regs:$mask, i32imm:$base, i32imm:$offset), 1893 (int_nvvm_fns i32:$mask, imm:$base, imm:$offset)>; 1894def INT_FNS_irr : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, Int32Regs:$offset), 1895 (int_nvvm_fns imm:$mask, i32:$base, i32:$offset)>; 1896def INT_FNS_iri : INT_FNS_MBO<(ins i32imm:$mask, Int32Regs:$base, i32imm:$offset), 1897 (int_nvvm_fns imm:$mask, i32:$base, imm:$offset)>; 1898def INT_FNS_iir : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, Int32Regs:$offset), 1899 (int_nvvm_fns imm:$mask, imm:$base, i32:$offset)>; 1900def INT_FNS_iii : INT_FNS_MBO<(ins i32imm:$mask, i32imm:$base, i32imm:$offset), 1901 (int_nvvm_fns imm:$mask, imm:$base, imm:$offset)>; 1902 1903//----------------------------------- 1904// Atomic Functions 1905//----------------------------------- 1906 1907class ATOMIC_GLOBAL_CHK <dag ops, dag frag> 1908 : PatFrag<ops, frag, AS_match.global>; 1909class ATOMIC_SHARED_CHK <dag ops, dag frag> 1910 : PatFrag<ops, frag, AS_match.shared>; 1911class ATOMIC_GENERIC_CHK <dag ops, dag frag> 1912 : PatFrag<ops, frag, AS_match.generic>; 1913 1914multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass, 1915 ValueType regT, NVPTXRegClass regclass, 1916 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1917 Operand IMMType, SDNode IMM, list<Predicate> Pred> { 1918 let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { 1919 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1920 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"), 1921 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, 1922 Requires<Pred>; 1923 def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b), 1924 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""), 1925 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>, 1926 Requires<!if(!or(!eq(TypeStr, ".f16"), !eq(TypeStr, ".bf16")), [Predicate<"false">], Pred)>; 1927 } 1928} 1929multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1930 string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM, 1931 list<Predicate> Pred = []> { 1932 defm p32 : F_ATOMIC_2_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1933 IntOp, IMMType, IMM, Pred>; 1934 defm p64 : F_ATOMIC_2_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1935 IntOp, IMMType, IMM, Pred>; 1936} 1937 1938// has 2 operands, neg the second one 1939multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass, 1940 ValueType regT, NVPTXRegClass regclass, 1941 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1942 list<Predicate> Pred> { 1943 let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { 1944 def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b), 1945 !strconcat( 1946 "{{ \n\t", 1947 ".reg \t.s", TypeStr, " temp; \n\t", 1948 "neg.s", TypeStr, " \ttemp, $b; \n\t", 1949 "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t", 1950 "}}"), 1951 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>, 1952 Requires<Pred>; 1953 } 1954} 1955multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr, 1956 string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> { 1957 defm p32: F_ATOMIC_2_NEG_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1958 IntOp, Pred> ; 1959 defm p64: F_ATOMIC_2_NEG_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1960 IntOp, Pred> ; 1961} 1962 1963// has 3 operands 1964multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass, 1965 ValueType regT, NVPTXRegClass regclass, 1966 string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp, 1967 Operand IMMType, list<Predicate> Pred> { 1968 let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in { 1969 def reg : NVPTXInst<(outs regclass:$dst), 1970 (ins ptrclass:$addr, regclass:$b, regclass:$c), 1971 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1972 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>, 1973 Requires<Pred>; 1974 1975 def imm1 : NVPTXInst<(outs regclass:$dst), 1976 (ins ptrclass:$addr, IMMType:$b, regclass:$c), 1977 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1978 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>, 1979 Requires<Pred>; 1980 1981 def imm2 : NVPTXInst<(outs regclass:$dst), 1982 (ins ptrclass:$addr, regclass:$b, IMMType:$c), 1983 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""), 1984 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>, 1985 Requires<Pred>; 1986 1987 def imm3 : NVPTXInst<(outs regclass:$dst), 1988 (ins ptrclass:$addr, IMMType:$b, IMMType:$c), 1989 !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"), 1990 [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>, 1991 Requires<Pred>; 1992 } 1993} 1994multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr, 1995 string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> { 1996 defm p32 : F_ATOMIC_3_imp<i32, Int32Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1997 IntOp, IMMType, Pred>; 1998 defm p64 : F_ATOMIC_3_imp<i64, Int64Regs, regT, regclass, SpaceStr, TypeStr, OpcStr, 1999 IntOp, IMMType, Pred>; 2000} 2001 2002// atom_add 2003 2004def atomic_load_add_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2005 (atomic_load_add_i32 node:$a, node:$b)>; 2006def atomic_load_add_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2007 (atomic_load_add_i32 node:$a, node:$b)>; 2008def atomic_load_add_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2009 (atomic_load_add_i32 node:$a, node:$b)>; 2010def atomic_load_add_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2011 (atomic_load_add_i64 node:$a, node:$b)>; 2012def atomic_load_add_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2013 (atomic_load_add_i64 node:$a, node:$b)>; 2014def atomic_load_add_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2015 (atomic_load_add_i64 node:$a, node:$b)>; 2016def atomic_load_add_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2017 (atomic_load_fadd node:$a, node:$b)>; 2018def atomic_load_add_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2019 (atomic_load_fadd node:$a, node:$b)>; 2020def atomic_load_add_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2021 (atomic_load_fadd node:$a, node:$b)>; 2022 2023defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".add", 2024 atomic_load_add_i32_g, i32imm, imm>; 2025defm INT_PTX_ATOM_ADD_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".add", 2026 atomic_load_add_i32_s, i32imm, imm>; 2027defm INT_PTX_ATOM_ADD_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".add", 2028 atomic_load_add_i32_gen, i32imm, imm>; 2029defm INT_PTX_ATOM_ADD_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", 2030 ".add", atomic_load_add_i32_gen, i32imm, imm>; 2031 2032defm INT_PTX_ATOM_ADD_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", ".add", 2033 atomic_load_add_i64_g, i64imm, imm>; 2034defm INT_PTX_ATOM_ADD_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", ".add", 2035 atomic_load_add_i64_s, i64imm, imm>; 2036defm INT_PTX_ATOM_ADD_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".add", 2037 atomic_load_add_i64_gen, i64imm, imm>; 2038defm INT_PTX_ATOM_ADD_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", 2039 ".add", atomic_load_add_i64_gen, i64imm, imm>; 2040 2041defm INT_PTX_ATOM_ADD_G_F16 : F_ATOMIC_2<f16, Int16Regs, ".global", ".f16", ".add.noftz", 2042 atomic_load_add_g, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>; 2043defm INT_PTX_ATOM_ADD_S_F16 : F_ATOMIC_2<f16, Int16Regs, ".shared", ".f16", ".add.noftz", 2044 atomic_load_add_s, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>; 2045defm INT_PTX_ATOM_ADD_GEN_F16 : F_ATOMIC_2<f16, Int16Regs, "", ".f16", ".add.noftz", 2046 atomic_load_add_gen, f16imm, fpimm, [hasSM<70>, hasPTX<63>]>; 2047 2048defm INT_PTX_ATOM_ADD_G_BF16 : F_ATOMIC_2<bf16, Int16Regs, ".global", ".bf16", ".add.noftz", 2049 atomic_load_add_g, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>; 2050defm INT_PTX_ATOM_ADD_S_BF16 : F_ATOMIC_2<bf16, Int16Regs, ".shared", ".bf16", ".add.noftz", 2051 atomic_load_add_s, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>; 2052defm INT_PTX_ATOM_ADD_GEN_BF16 : F_ATOMIC_2<bf16, Int16Regs, "", ".bf16", ".add.noftz", 2053 atomic_load_add_gen, bf16imm, fpimm, [hasSM<90>, hasPTX<78>]>; 2054 2055defm INT_PTX_ATOM_ADD_G_F32 : F_ATOMIC_2<f32, Float32Regs, ".global", ".f32", ".add", 2056 atomic_load_add_g, f32imm, fpimm>; 2057defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<f32, Float32Regs, ".shared", ".f32", ".add", 2058 atomic_load_add_s, f32imm, fpimm>; 2059defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<f32, Float32Regs, "", ".f32", ".add", 2060 atomic_load_add_gen, f32imm, fpimm>; 2061 2062defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<f64, Float64Regs, ".global", ".f64", ".add", 2063 atomic_load_add_g, f64imm, fpimm, [hasAtomAddF64]>; 2064defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<f64, Float64Regs, ".shared", ".f64", ".add", 2065 atomic_load_add_s, f64imm, fpimm, [hasAtomAddF64]>; 2066defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<f64, Float64Regs, "", ".f64", ".add", 2067 atomic_load_add_gen, f64imm, fpimm, [hasAtomAddF64]>; 2068 2069// atom_sub 2070 2071def atomic_load_sub_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2072 (atomic_load_sub_i32 node:$a, node:$b)>; 2073def atomic_load_sub_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2074 (atomic_load_sub_i32 node:$a, node:$b)>; 2075def atomic_load_sub_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2076 (atomic_load_sub_i32 node:$a, node:$b)>; 2077def atomic_load_sub_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2078 (atomic_load_sub_i64 node:$a, node:$b)>; 2079def atomic_load_sub_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2080 (atomic_load_sub_i64 node:$a, node:$b)>; 2081def atomic_load_sub_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2082 (atomic_load_sub_i64 node:$a, node:$b)>; 2083 2084defm INT_PTX_ATOM_SUB_G_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", ".add", 2085 atomic_load_sub_i32_g>; 2086defm INT_PTX_ATOM_SUB_G_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", ".add", 2087 atomic_load_sub_i64_g>; 2088defm INT_PTX_ATOM_SUB_GEN_32 : F_ATOMIC_2_NEG<i32, Int32Regs, "", "32", ".add", 2089 atomic_load_sub_i32_gen>; 2090defm INT_PTX_ATOM_SUB_GEN_32_USE_G : F_ATOMIC_2_NEG<i32, Int32Regs, ".global", "32", 2091 ".add", atomic_load_sub_i32_gen>; 2092defm INT_PTX_ATOM_SUB_S_32 : F_ATOMIC_2_NEG<i32, Int32Regs, ".shared", "32", ".add", 2093 atomic_load_sub_i32_s>; 2094defm INT_PTX_ATOM_SUB_S_64 : F_ATOMIC_2_NEG<i64, Int64Regs, ".shared", "64", ".add", 2095 atomic_load_sub_i64_s>; 2096defm INT_PTX_ATOM_SUB_GEN_64 : F_ATOMIC_2_NEG<i64, Int64Regs, "", "64", ".add", 2097 atomic_load_sub_i64_gen>; 2098defm INT_PTX_ATOM_SUB_GEN_64_USE_G : F_ATOMIC_2_NEG<i64, Int64Regs, ".global", "64", 2099 ".add", atomic_load_sub_i64_gen>; 2100 2101// atom_swap 2102 2103def atomic_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2104 (atomic_swap_i32 node:$a, node:$b)>; 2105def atomic_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2106 (atomic_swap_i32 node:$a, node:$b)>; 2107def atomic_swap_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2108 (atomic_swap_i32 node:$a, node:$b)>; 2109def atomic_swap_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2110 (atomic_swap_i64 node:$a, node:$b)>; 2111def atomic_swap_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2112 (atomic_swap_i64 node:$a, node:$b)>; 2113def atomic_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2114 (atomic_swap_i64 node:$a, node:$b)>; 2115 2116defm INT_PTX_ATOM_SWAP_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".exch", 2117 atomic_swap_i32_g, i32imm, imm>; 2118defm INT_PTX_ATOM_SWAP_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".exch", 2119 atomic_swap_i32_s, i32imm, imm>; 2120defm INT_PTX_ATOM_SWAP_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".exch", 2121 atomic_swap_i32_gen, i32imm, imm>; 2122defm INT_PTX_ATOM_SWAP_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", 2123 ".exch", atomic_swap_i32_gen, i32imm, imm>; 2124defm INT_PTX_ATOM_SWAP_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".exch", 2125 atomic_swap_i64_g, i64imm, imm>; 2126defm INT_PTX_ATOM_SWAP_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".exch", 2127 atomic_swap_i64_s, i64imm, imm>; 2128defm INT_PTX_ATOM_SWAP_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".exch", 2129 atomic_swap_i64_gen, i64imm, imm>; 2130defm INT_PTX_ATOM_SWAP_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", 2131 ".exch", atomic_swap_i64_gen, i64imm, imm>; 2132 2133// atom_max 2134 2135def atomic_load_max_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 2136 , (atomic_load_max_i32 node:$a, node:$b)>; 2137def atomic_load_max_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2138 (atomic_load_max_i32 node:$a, node:$b)>; 2139def atomic_load_max_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2140 (atomic_load_max_i32 node:$a, node:$b)>; 2141def atomic_load_max_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) 2142 , (atomic_load_max_i64 node:$a, node:$b)>; 2143def atomic_load_max_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2144 (atomic_load_max_i64 node:$a, node:$b)>; 2145def atomic_load_max_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2146 (atomic_load_max_i64 node:$a, node:$b)>; 2147def atomic_load_umax_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2148 (atomic_load_umax_i32 node:$a, node:$b)>; 2149def atomic_load_umax_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2150 (atomic_load_umax_i32 node:$a, node:$b)>; 2151def atomic_load_umax_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2152 (atomic_load_umax_i32 node:$a, node:$b)>; 2153def atomic_load_umax_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2154 (atomic_load_umax_i64 node:$a, node:$b)>; 2155def atomic_load_umax_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2156 (atomic_load_umax_i64 node:$a, node:$b)>; 2157def atomic_load_umax_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2158 (atomic_load_umax_i64 node:$a, node:$b)>; 2159 2160defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32", 2161 ".max", atomic_load_max_i32_g, i32imm, imm>; 2162defm INT_PTX_ATOM_LOAD_MAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32", 2163 ".max", atomic_load_max_i32_s, i32imm, imm>; 2164defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".max", 2165 atomic_load_max_i32_gen, i32imm, imm>; 2166defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", 2167 ".s32", ".max", atomic_load_max_i32_gen, i32imm, imm>; 2168defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64", 2169 ".max", atomic_load_max_i64_g, i64imm, imm, [hasSM<32>]>; 2170defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64", 2171 ".max", atomic_load_max_i64_s, i64imm, imm, [hasSM<32>]>; 2172defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".max", 2173 atomic_load_max_i64_gen, i64imm, imm, [hasSM<32>]>; 2174defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", 2175 ".s64", ".max", atomic_load_max_i64_gen, i64imm, imm, [hasSM<32>]>; 2176defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", 2177 ".max", atomic_load_umax_i32_g, i32imm, imm>; 2178defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", 2179 ".max", atomic_load_umax_i32_s, i32imm, imm>; 2180defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".max", 2181 atomic_load_umax_i32_gen, i32imm, imm>; 2182defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", 2183 ".u32", ".max", atomic_load_umax_i32_gen, i32imm, imm>; 2184defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", 2185 ".max", atomic_load_umax_i64_g, i64imm, imm, [hasSM<32>]>; 2186defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", 2187 ".max", atomic_load_umax_i64_s, i64imm, imm, [hasSM<32>]>; 2188defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".max", 2189 atomic_load_umax_i64_gen, i64imm, imm, [hasSM<32>]>; 2190defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", 2191 ".u64", ".max", atomic_load_umax_i64_gen, i64imm, imm, [hasSM<32>]>; 2192 2193// atom_min 2194 2195def atomic_load_min_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2196 (atomic_load_min_i32 node:$a, node:$b)>; 2197def atomic_load_min_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2198 (atomic_load_min_i32 node:$a, node:$b)>; 2199def atomic_load_min_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2200 (atomic_load_min_i32 node:$a, node:$b)>; 2201def atomic_load_min_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2202 (atomic_load_min_i64 node:$a, node:$b)>; 2203def atomic_load_min_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2204 (atomic_load_min_i64 node:$a, node:$b)>; 2205def atomic_load_min_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2206 (atomic_load_min_i64 node:$a, node:$b)>; 2207def atomic_load_umin_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2208 (atomic_load_umin_i32 node:$a, node:$b)>; 2209def atomic_load_umin_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2210 (atomic_load_umin_i32 node:$a, node:$b)>; 2211def atomic_load_umin_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2212 (atomic_load_umin_i32 node:$a, node:$b)>; 2213def atomic_load_umin_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2214 (atomic_load_umin_i64 node:$a, node:$b)>; 2215def atomic_load_umin_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2216 (atomic_load_umin_i64 node:$a, node:$b)>; 2217def atomic_load_umin_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2218 (atomic_load_umin_i64 node:$a, node:$b)>; 2219 2220defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".s32", 2221 ".min", atomic_load_min_i32_g, i32imm, imm>; 2222defm INT_PTX_ATOM_LOAD_MIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".s32", 2223 ".min", atomic_load_min_i32_s, i32imm, imm>; 2224defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".s32", ".min", 2225 atomic_load_min_i32_gen, i32imm, imm>; 2226defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", 2227 ".s32", ".min", atomic_load_min_i32_gen, i32imm, imm>; 2228defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".s64", 2229 ".min", atomic_load_min_i64_g, i64imm, imm, [hasSM<32>]>; 2230defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".s64", 2231 ".min", atomic_load_min_i64_s, i64imm, imm, [hasSM<32>]>; 2232defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".s64", ".min", 2233 atomic_load_min_i64_gen, i64imm, imm, [hasSM<32>]>; 2234defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", 2235 ".s64", ".min", atomic_load_min_i64_gen, i64imm, imm, [hasSM<32>]>; 2236defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", 2237 ".min", atomic_load_umin_i32_g, i32imm, imm>; 2238defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", 2239 ".min", atomic_load_umin_i32_s, i32imm, imm>; 2240defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".min", 2241 atomic_load_umin_i32_gen, i32imm, imm>; 2242defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", 2243 ".u32", ".min", atomic_load_umin_i32_gen, i32imm, imm>; 2244defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".u64", 2245 ".min", atomic_load_umin_i64_g, i64imm, imm, [hasSM<32>]>; 2246defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".u64", 2247 ".min", atomic_load_umin_i64_s, i64imm, imm, [hasSM<32>]>; 2248defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".u64", ".min", 2249 atomic_load_umin_i64_gen, i64imm, imm, [hasSM<32>]>; 2250defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", 2251 ".u64", ".min", atomic_load_umin_i64_gen, i64imm, imm, [hasSM<32>]>; 2252 2253// atom_inc atom_dec 2254 2255def atomic_load_inc_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2256 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 2257def atomic_load_inc_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2258 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 2259def atomic_load_inc_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2260 (int_nvvm_atomic_load_inc_32 node:$a, node:$b)>; 2261def atomic_load_dec_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2262 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 2263def atomic_load_dec_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2264 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 2265def atomic_load_dec_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2266 (int_nvvm_atomic_load_dec_32 node:$a, node:$b)>; 2267 2268defm INT_PTX_ATOM_INC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".inc", 2269 atomic_load_inc_32_g, i32imm, imm>; 2270defm INT_PTX_ATOM_INC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".inc", 2271 atomic_load_inc_32_s, i32imm, imm>; 2272defm INT_PTX_ATOM_INC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".inc", 2273 atomic_load_inc_32_gen, i32imm, imm>; 2274defm INT_PTX_ATOM_INC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", 2275 ".inc", atomic_load_inc_32_gen, i32imm, imm>; 2276defm INT_PTX_ATOM_DEC_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", ".dec", 2277 atomic_load_dec_32_g, i32imm, imm>; 2278defm INT_PTX_ATOM_DEC_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".u32", ".dec", 2279 atomic_load_dec_32_s, i32imm, imm>; 2280defm INT_PTX_ATOM_DEC_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".u32", ".dec", 2281 atomic_load_dec_32_gen, i32imm, imm>; 2282defm INT_PTX_ATOM_DEC_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".u32", 2283 ".dec", atomic_load_dec_32_gen, i32imm, imm>; 2284 2285// atom_and 2286 2287def atomic_load_and_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2288 (atomic_load_and_i32 node:$a, node:$b)>; 2289def atomic_load_and_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2290 (atomic_load_and_i32 node:$a, node:$b)>; 2291def atomic_load_and_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2292 (atomic_load_and_i32 node:$a, node:$b)>; 2293def atomic_load_and_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2294 (atomic_load_and_i64 node:$a, node:$b)>; 2295def atomic_load_and_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2296 (atomic_load_and_i64 node:$a, node:$b)>; 2297def atomic_load_and_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2298 (atomic_load_and_i64 node:$a, node:$b)>; 2299 2300defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".and", 2301 atomic_load_and_i32_g, i32imm, imm>; 2302defm INT_PTX_ATOM_AND_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".and", 2303 atomic_load_and_i32_s, i32imm, imm>; 2304defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".and", 2305 atomic_load_and_i32_gen, i32imm, imm>; 2306defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", 2307 ".and", atomic_load_and_i32_gen, i32imm, imm>; 2308defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".and", 2309 atomic_load_and_i64_g, i64imm, imm, [hasSM<32>]>; 2310defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".and", 2311 atomic_load_and_i64_s, i64imm, imm, [hasSM<32>]>; 2312defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".and", 2313 atomic_load_and_i64_gen, i64imm, imm, [hasSM<32>]>; 2314defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", 2315 ".and", atomic_load_and_i64_gen, i64imm, imm, [hasSM<32>]>; 2316 2317// atom_or 2318 2319def atomic_load_or_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2320 (atomic_load_or_i32 node:$a, node:$b)>; 2321def atomic_load_or_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2322 (atomic_load_or_i32 node:$a, node:$b)>; 2323def atomic_load_or_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2324 (atomic_load_or_i32 node:$a, node:$b)>; 2325def atomic_load_or_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2326 (atomic_load_or_i64 node:$a, node:$b)>; 2327def atomic_load_or_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2328 (atomic_load_or_i64 node:$a, node:$b)>; 2329def atomic_load_or_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2330 (atomic_load_or_i64 node:$a, node:$b)>; 2331 2332defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".or", 2333 atomic_load_or_i32_g, i32imm, imm>; 2334defm INT_PTX_ATOM_OR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".or", 2335 atomic_load_or_i32_gen, i32imm, imm>; 2336defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", 2337 ".or", atomic_load_or_i32_gen, i32imm, imm>; 2338defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".or", 2339 atomic_load_or_i32_s, i32imm, imm>; 2340defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".or", 2341 atomic_load_or_i64_g, i64imm, imm, [hasSM<32>]>; 2342defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".or", 2343 atomic_load_or_i64_gen, i64imm, imm, [hasSM<32>]>; 2344defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", 2345 ".or", atomic_load_or_i64_gen, i64imm, imm, [hasSM<32>]>; 2346defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".or", 2347 atomic_load_or_i64_s, i64imm, imm, [hasSM<32>]>; 2348 2349// atom_xor 2350 2351def atomic_load_xor_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2352 (atomic_load_xor_i32 node:$a, node:$b)>; 2353def atomic_load_xor_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2354 (atomic_load_xor_i32 node:$a, node:$b)>; 2355def atomic_load_xor_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2356 (atomic_load_xor_i32 node:$a, node:$b)>; 2357def atomic_load_xor_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), 2358 (atomic_load_xor_i64 node:$a, node:$b)>; 2359def atomic_load_xor_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), 2360 (atomic_load_xor_i64 node:$a, node:$b)>; 2361def atomic_load_xor_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), 2362 (atomic_load_xor_i64 node:$a, node:$b)>; 2363 2364defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", ".xor", 2365 atomic_load_xor_i32_g, i32imm, imm>; 2366defm INT_PTX_ATOM_XOR_S_32 : F_ATOMIC_2<i32, Int32Regs, ".shared", ".b32", ".xor", 2367 atomic_load_xor_i32_s, i32imm, imm>; 2368defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<i32, Int32Regs, "", ".b32", ".xor", 2369 atomic_load_xor_i32_gen, i32imm, imm>; 2370defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<i32, Int32Regs, ".global", ".b32", 2371 ".xor", atomic_load_xor_i32_gen, i32imm, imm>; 2372defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", ".xor", 2373 atomic_load_xor_i64_g, i64imm, imm, [hasSM<32>]>; 2374defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<i64, Int64Regs, ".shared", ".b64", ".xor", 2375 atomic_load_xor_i64_s, i64imm, imm, [hasSM<32>]>; 2376defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<i64, Int64Regs, "", ".b64", ".xor", 2377 atomic_load_xor_i64_gen, i64imm, imm, [hasSM<32>]>; 2378defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<i64, Int64Regs, ".global", ".b64", 2379 ".xor", atomic_load_xor_i64_gen, i64imm, imm, [hasSM<32>]>; 2380 2381// atom_cas 2382 2383def atomic_cmp_swap_i16_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 2384 (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>; 2385def atomic_cmp_swap_i16_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 2386 (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>; 2387def atomic_cmp_swap_i16_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 2388 (atomic_cmp_swap_i16 node:$a, node:$b, node:$c)>; 2389def atomic_cmp_swap_i32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 2390 (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>; 2391def atomic_cmp_swap_i32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 2392 (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>; 2393def atomic_cmp_swap_i32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 2394 (atomic_cmp_swap_i32 node:$a, node:$b, node:$c)>; 2395def atomic_cmp_swap_i64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b, node:$c), 2396 (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>; 2397def atomic_cmp_swap_i64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b, node:$c), 2398 (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>; 2399def atomic_cmp_swap_i64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b, node:$c), 2400 (atomic_cmp_swap_i64 node:$a, node:$b, node:$c)>; 2401 2402defm INT_PTX_ATOM_CAS_G_16 : F_ATOMIC_3<i16, Int16Regs, ".global", ".b16", ".cas", 2403 atomic_cmp_swap_i16_g, i16imm, [hasSM<70>, hasPTX<63>]>; 2404defm INT_PTX_ATOM_CAS_S_16 : F_ATOMIC_3<i16, Int16Regs, ".shared", ".b16", ".cas", 2405 atomic_cmp_swap_i16_s, i16imm, [hasSM<70>, hasPTX<63>]>; 2406defm INT_PTX_ATOM_CAS_GEN_16 : F_ATOMIC_3<i16, Int16Regs, "", ".b16", ".cas", 2407 atomic_cmp_swap_i16_gen, i16imm, [hasSM<70>, hasPTX<63>]>; 2408defm INT_PTX_ATOM_CAS_GEN_16_USE_G : F_ATOMIC_3<i16, Int16Regs, ".global", ".b16", ".cas", 2409 atomic_cmp_swap_i16_gen, i16imm, [hasSM<70>, hasPTX<63>]>; 2410defm INT_PTX_ATOM_CAS_G_32 : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", ".cas", 2411 atomic_cmp_swap_i32_g, i32imm>; 2412defm INT_PTX_ATOM_CAS_S_32 : F_ATOMIC_3<i32, Int32Regs, ".shared", ".b32", ".cas", 2413 atomic_cmp_swap_i32_s, i32imm>; 2414defm INT_PTX_ATOM_CAS_GEN_32 : F_ATOMIC_3<i32, Int32Regs, "", ".b32", ".cas", 2415 atomic_cmp_swap_i32_gen, i32imm>; 2416defm INT_PTX_ATOM_CAS_GEN_32_USE_G : F_ATOMIC_3<i32, Int32Regs, ".global", ".b32", 2417 ".cas", atomic_cmp_swap_i32_gen, i32imm>; 2418defm INT_PTX_ATOM_CAS_G_64 : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", ".cas", 2419 atomic_cmp_swap_i64_g, i64imm>; 2420defm INT_PTX_ATOM_CAS_S_64 : F_ATOMIC_3<i64, Int64Regs, ".shared", ".b64", ".cas", 2421 atomic_cmp_swap_i64_s, i64imm>; 2422defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<i64, Int64Regs, "", ".b64", ".cas", 2423 atomic_cmp_swap_i64_gen, i64imm>; 2424defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<i64, Int64Regs, ".global", ".b64", 2425 ".cas", atomic_cmp_swap_i64_gen, i64imm>; 2426 2427// Support for scoped atomic operations. Matches 2428// int_nvvm_atomic_{op}_{space}_{type}_{scope} 2429// and converts it into the appropriate instruction. 2430// NOTE: not all possible combinations are implemented 2431// 'space' is limited to generic as it's the only one needed to support CUDA. 2432// 'scope' = 'gpu' is default and is handled by regular atomic instructions. 2433class ATOM23_impl<string AsmStr, ValueType regT, NVPTXRegClass regclass, list<Predicate> Preds, 2434 dag ins, dag Operands> 2435 : NVPTXInst<(outs regclass:$result), ins, 2436 AsmStr, 2437 [(set regT:$result, Operands)]>, 2438 Requires<Preds>; 2439 2440// Define instruction variants for all addressing modes. 2441multiclass ATOM2P_impl<string AsmStr, Intrinsic Intr, 2442 ValueType regT, NVPTXRegClass regclass, Operand ImmType, 2443 SDNode Imm, ValueType ImmTy, 2444 list<Predicate> Preds> { 2445 let AddedComplexity = 1 in { 2446 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2447 (ins Int16Regs:$src, regclass:$b), 2448 (Intr i16:$src, regT:$b)>; 2449 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2450 (ins Int32Regs:$src, regclass:$b), 2451 (Intr i32:$src, regT:$b)>; 2452 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2453 (ins Int64Regs:$src, regclass:$b), 2454 (Intr i64:$src, regT:$b)>; 2455 } 2456 // tablegen can't infer argument types from Intrinsic (though it can 2457 // from Instruction) so we have to enforce specific type on 2458 // immediates via explicit cast to ImmTy. 2459 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2460 (ins Int16Regs:$src, ImmType:$b), 2461 (Intr i16:$src, (ImmTy Imm:$b))>; 2462 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2463 (ins Int32Regs:$src, ImmType:$b), 2464 (Intr i32:$src, (ImmTy Imm:$b))>; 2465 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2466 (ins Int64Regs:$src, ImmType:$b), 2467 (Intr i64:$src, (ImmTy Imm:$b))>; 2468} 2469 2470multiclass ATOM3P_impl<string AsmStr, Intrinsic Intr, 2471 ValueType regT, NVPTXRegClass regclass, 2472 Operand ImmType, SDNode Imm, ValueType ImmTy, 2473 list<Predicate> Preds> { 2474 // Variants for register/immediate permutations of $b and $c 2475 let AddedComplexity = 2 in { 2476 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2477 (ins Int32Regs:$src, regclass:$b, regclass:$c), 2478 (Intr i32:$src, regT:$b, regT:$c)>; 2479 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2480 (ins Int64Regs:$src, regclass:$b, regclass:$c), 2481 (Intr i64:$src, regT:$b, regT:$c)>; 2482 } 2483 let AddedComplexity = 1 in { 2484 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2485 (ins Int32Regs:$src, ImmType:$b, regclass:$c), 2486 (Intr i32:$src, (ImmTy Imm:$b), regT:$c)>; 2487 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2488 (ins Int64Regs:$src, ImmType:$b, regclass:$c), 2489 (Intr i64:$src, (ImmTy Imm:$b), regT:$c)>; 2490 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2491 (ins Int32Regs:$src, regclass:$b, ImmType:$c), 2492 (Intr i32:$src, regT:$b, (ImmTy Imm:$c))>; 2493 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2494 (ins Int64Regs:$src, regclass:$b, ImmType:$c), 2495 (Intr i64:$src, regT:$b, (ImmTy Imm:$c))>; 2496 } 2497 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2498 (ins Int32Regs:$src, ImmType:$b, ImmType:$c), 2499 (Intr i32:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 2500 def : ATOM23_impl<AsmStr, regT, regclass, Preds, 2501 (ins Int64Regs:$src, ImmType:$b, ImmType:$c), 2502 (Intr i64:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>; 2503} 2504 2505// Constructs intrinsic name and instruction asm strings. 2506multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr, 2507 string ScopeStr, string SpaceStr, 2508 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 2509 ValueType ImmTy, list<Predicate> Preds> { 2510 defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 2511 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 2512 # "." # OpStr # "." # TypeStr 2513 # " \t$result, [$src], $b;", 2514 !cast<Intrinsic>( 2515 "int_nvvm_atomic_" # OpStr 2516 # "_" # SpaceStr # "_" # IntTypeStr 2517 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 2518 regT, regclass, ImmType, Imm, ImmTy, Preds>; 2519} 2520multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr, 2521 string ScopeStr, string SpaceStr, 2522 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 2523 ValueType ImmTy, list<Predicate> Preds> { 2524 defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr) 2525 # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr) 2526 # "." # OpStr # "." # TypeStr 2527 # " \t$result, [$src], $b, $c;", 2528 !cast<Intrinsic>( 2529 "int_nvvm_atomic_" # OpStr 2530 # "_" # SpaceStr # "_" # IntTypeStr 2531 # !if(!empty(ScopeStr), "", "_" # ScopeStr)), 2532 regT, regclass, ImmType, Imm, ImmTy, Preds>; 2533} 2534 2535// Constructs variants for different address spaces. 2536// For now we only need variants for generic space pointers. 2537multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr, 2538 string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType, 2539 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 2540 defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 2541 regT, regclass, ImmType, Imm, ImmTy, Preds>; 2542} 2543multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr, 2544 string ScopeStr, ValueType regT, NVPTXRegClass regclass, Operand ImmType, 2545 SDNode Imm, ValueType ImmTy, list<Predicate> Preds> { 2546 defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen", 2547 regT, regclass, ImmType, Imm, ImmTy, Preds>; 2548} 2549 2550// Constructs variants for different scopes of atomic op. 2551multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr, 2552 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, 2553 ValueType ImmTy, list<Predicate> Preds> { 2554 // .gpu scope is default and is currently covered by existing 2555 // atomics w/o explicitly specified scope. 2556 defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta", 2557 regT, regclass, ImmType, Imm, ImmTy, 2558 !listconcat(Preds,[hasAtomScope])>; 2559 defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys", 2560 regT, regclass, ImmType, Imm, ImmTy, 2561 !listconcat(Preds,[hasAtomScope])>; 2562} 2563multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr, 2564 ValueType regT, NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy, 2565 list<Predicate> Preds> { 2566 // No need to define ".gpu"-scoped atomics. They do the same thing 2567 // as the regular, non-scoped atomics defined elsewhere. 2568 defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta", 2569 regT, regclass, ImmType, Imm, ImmTy, 2570 !listconcat(Preds,[hasAtomScope])>; 2571 defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys", 2572 regT, regclass, ImmType, Imm, ImmTy, 2573 !listconcat(Preds,[hasAtomScope])>; 2574} 2575 2576// atom.add 2577multiclass ATOM2_add_impl<string OpStr> { 2578 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>; 2579 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>; 2580 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, []>; 2581 defm _bf16 : ATOM2S_impl<OpStr, "f", "bf16", bf16, Int16Regs, bf16imm, fpimm, bf16, 2582 [hasSM<90>, hasPTX<78>]>; 2583 defm _f16 : ATOM2S_impl<OpStr, "f", "f16", f16, Int16Regs, f16imm, fpimm, f16, 2584 [hasSM<70>, hasPTX<63>]>; 2585 defm _f32 : ATOM2S_impl<OpStr, "f", "f32", f32, Float32Regs, f32imm, fpimm, f32, 2586 []>; 2587 defm _f64 : ATOM2S_impl<OpStr, "f", "f64", f64, Float64Regs, f64imm, fpimm, f64, 2588 [hasAtomAddF64]>; 2589} 2590 2591// atom.{and,or,xor} 2592multiclass ATOM2_bitwise_impl<string OpStr> { 2593 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>; 2594 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, 2595 [hasAtomBitwise64]>; 2596} 2597 2598// atom.exch 2599multiclass ATOM2_exch_impl<string OpStr> { 2600 defm _b32 : ATOM2S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>; 2601 defm _b64 : ATOM2S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>; 2602} 2603 2604// atom.{min,max} 2605multiclass ATOM2_minmax_impl<string OpStr> { 2606 defm _s32 : ATOM2S_impl<OpStr, "i", "s32", i32, Int32Regs, i32imm, imm, i32, []>; 2607 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>; 2608 defm _s64 : ATOM2S_impl<OpStr, "i", "s64", i64, Int64Regs, i64imm, imm, i64, 2609 [hasAtomMinMax64]>; 2610 defm _u64 : ATOM2S_impl<OpStr, "i", "u64", i64, Int64Regs, i64imm, imm, i64, 2611 [hasAtomMinMax64]>; 2612} 2613 2614// atom.{inc,dec} 2615multiclass ATOM2_incdec_impl<string OpStr> { 2616 defm _u32 : ATOM2S_impl<OpStr, "i", "u32", i32, Int32Regs, i32imm, imm, i32, []>; 2617} 2618 2619// atom.cas 2620multiclass ATOM3_cas_impl<string OpStr> { 2621 defm _b16 : ATOM3S_impl<OpStr, "i", "b16", i16, Int16Regs, i16imm, imm, i16, []>; 2622 defm _b32 : ATOM3S_impl<OpStr, "i", "b32", i32, Int32Regs, i32imm, imm, i32, []>; 2623 defm _b64 : ATOM3S_impl<OpStr, "i", "b64", i64, Int64Regs, i64imm, imm, i64, []>; 2624} 2625 2626defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">; 2627defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">; 2628defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">; 2629defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">; 2630defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">; 2631defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">; 2632defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">; 2633defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">; 2634defm INT_PTX_SATOM_OR : ATOM2_bitwise_impl<"or">; 2635defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">; 2636 2637//----------------------------------- 2638// Support for ldu on sm_20 or later 2639//----------------------------------- 2640 2641// Don't annotate ldu instructions as mayLoad, as they load from memory that is 2642// read-only in a kernel. 2643 2644// Scalar 2645 2646multiclass LDU_G<string TyStr, NVPTXRegClass regclass> { 2647 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 2648 !strconcat("ldu.global.", TyStr), 2649 []>, Requires<[hasLDU]>; 2650 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 2651 !strconcat("ldu.global.", TyStr), 2652 []>, Requires<[hasLDU]>; 2653 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 2654 !strconcat("ldu.global.", TyStr), 2655 []>, Requires<[hasLDU]>; 2656 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 2657 !strconcat("ldu.global.", TyStr), 2658 []>, Requires<[hasLDU]>; 2659 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 2660 !strconcat("ldu.global.", TyStr), 2661 []>, Requires<[hasLDU]>; 2662} 2663 2664defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>; 2665defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; 2666defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; 2667defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; 2668defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; 2669defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; 2670 2671// vector 2672 2673// Elementized vector ldu 2674multiclass VLDU_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 2675 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2676 (ins Int32Regs:$src), 2677 !strconcat("ldu.global.", TyStr), []>; 2678 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2679 (ins Int64Regs:$src), 2680 !strconcat("ldu.global.", TyStr), []>; 2681 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2682 (ins MEMri:$src), 2683 !strconcat("ldu.global.", TyStr), []>; 2684 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2685 (ins MEMri64:$src), 2686 !strconcat("ldu.global.", TyStr), []>; 2687 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2688 (ins imemAny:$src), 2689 !strconcat("ldu.global.", TyStr), []>; 2690} 2691 2692multiclass VLDU_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 2693 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2694 regclass:$dst4), (ins Int32Regs:$src), 2695 !strconcat("ldu.global.", TyStr), []>; 2696 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2697 regclass:$dst4), (ins Int64Regs:$src), 2698 !strconcat("ldu.global.", TyStr), []>; 2699 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2700 regclass:$dst4), (ins MEMri:$src), 2701 !strconcat("ldu.global.", TyStr), []>; 2702 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2703 regclass:$dst4), (ins MEMri64:$src), 2704 !strconcat("ldu.global.", TyStr), []>; 2705 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2706 regclass:$dst4), (ins imemAny:$src), 2707 !strconcat("ldu.global.", TyStr), []>; 2708} 2709 2710defm INT_PTX_LDU_G_v2i8_ELE 2711 : VLDU_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2712defm INT_PTX_LDU_G_v2i16_ELE 2713 : VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2714defm INT_PTX_LDU_G_v2i32_ELE 2715 : VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 2716defm INT_PTX_LDU_G_v2f32_ELE 2717 : VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 2718defm INT_PTX_LDU_G_v2i64_ELE 2719 : VLDU_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 2720defm INT_PTX_LDU_G_v2f64_ELE 2721 : VLDU_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 2722defm INT_PTX_LDU_G_v4i8_ELE 2723 : VLDU_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2724defm INT_PTX_LDU_G_v4i16_ELE 2725 : VLDU_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2726 Int16Regs>; 2727defm INT_PTX_LDU_G_v4i32_ELE 2728 : VLDU_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2729 Int32Regs>; 2730defm INT_PTX_LDU_G_v4f16_ELE 2731 : VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2732 Int16Regs>; 2733defm INT_PTX_LDU_G_v4f16x2_ELE 2734 : VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2735 Int32Regs>; 2736defm INT_PTX_LDU_G_v4f32_ELE 2737 : VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", 2738 Float32Regs>; 2739 2740 2741//----------------------------------- 2742// Support for ldg on sm_35 or later 2743//----------------------------------- 2744 2745// Don't annotate ld.global.nc as mayLoad, because these loads go through the 2746// non-coherent texture cache, and therefore the values read must be read-only 2747// during the lifetime of the kernel. 2748 2749multiclass LDG_G<string TyStr, NVPTXRegClass regclass> { 2750 def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), 2751 !strconcat("ld.global.nc.", TyStr), 2752 []>, Requires<[hasLDG]>; 2753 def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), 2754 !strconcat("ld.global.nc.", TyStr), 2755 []>, Requires<[hasLDG]>; 2756 def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), 2757 !strconcat("ld.global.nc.", TyStr), 2758 []>, Requires<[hasLDG]>; 2759 def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), 2760 !strconcat("ld.global.nc.", TyStr), 2761 []>, Requires<[hasLDG]>; 2762 def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), 2763 !strconcat("ld.global.nc.", TyStr), 2764 []>, Requires<[hasLDG]>; 2765} 2766 2767defm INT_PTX_LDG_GLOBAL_i8 2768 : LDG_G<"u8 \t$result, [$src];", Int16Regs>; 2769defm INT_PTX_LDG_GLOBAL_i16 2770 : LDG_G<"u16 \t$result, [$src];", Int16Regs>; 2771defm INT_PTX_LDG_GLOBAL_i32 2772 : LDG_G<"u32 \t$result, [$src];", Int32Regs>; 2773defm INT_PTX_LDG_GLOBAL_i64 2774 : LDG_G<"u64 \t$result, [$src];", Int64Regs>; 2775defm INT_PTX_LDG_GLOBAL_f32 2776 : LDG_G<"f32 \t$result, [$src];", Float32Regs>; 2777defm INT_PTX_LDG_GLOBAL_f64 2778 : LDG_G<"f64 \t$result, [$src];", Float64Regs>; 2779 2780// vector 2781 2782// Elementized vector ldg 2783multiclass VLDG_G_ELE_V2<string TyStr, NVPTXRegClass regclass> { 2784 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2785 (ins Int32Regs:$src), 2786 !strconcat("ld.global.nc.", TyStr), []>; 2787 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2788 (ins Int64Regs:$src), 2789 !strconcat("ld.global.nc.", TyStr), []>; 2790 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2791 (ins MEMri:$src), 2792 !strconcat("ld.global.nc.", TyStr), []>; 2793 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2794 (ins MEMri64:$src), 2795 !strconcat("ld.global.nc.", TyStr), []>; 2796 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2), 2797 (ins imemAny:$src), 2798 !strconcat("ld.global.nc.", TyStr), []>; 2799} 2800 2801multiclass VLDG_G_ELE_V4<string TyStr, NVPTXRegClass regclass> { 2802 def _areg32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2803 regclass:$dst4), (ins Int32Regs:$src), 2804 !strconcat("ld.global.nc.", TyStr), []>; 2805 def _areg64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2806 regclass:$dst4), (ins Int64Regs:$src), 2807 !strconcat("ld.global.nc.", TyStr), []>; 2808 def _ari32: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2809 regclass:$dst4), (ins MEMri:$src), 2810 !strconcat("ld.global.nc.", TyStr), []>; 2811 def _ari64: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2812 regclass:$dst4), (ins MEMri64:$src), 2813 !strconcat("ld.global.nc.", TyStr), []>; 2814 def _avar: NVPTXInst<(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, 2815 regclass:$dst4), (ins imemAny:$src), 2816 !strconcat("ld.global.nc.", TyStr), []>; 2817} 2818 2819// FIXME: 8-bit LDG should be fixed once LDG/LDU nodes are made into proper loads. 2820defm INT_PTX_LDG_G_v2i8_ELE 2821 : VLDG_G_ELE_V2<"v2.u8 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2822defm INT_PTX_LDG_G_v2i16_ELE 2823 : VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>; 2824defm INT_PTX_LDG_G_v2i32_ELE 2825 : VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>; 2826defm INT_PTX_LDG_G_v2f32_ELE 2827 : VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>; 2828defm INT_PTX_LDG_G_v2i64_ELE 2829 : VLDG_G_ELE_V2<"v2.u64 \t{{$dst1, $dst2}}, [$src];", Int64Regs>; 2830defm INT_PTX_LDG_G_v2f64_ELE 2831 : VLDG_G_ELE_V2<"v2.f64 \t{{$dst1, $dst2}}, [$src];", Float64Regs>; 2832defm INT_PTX_LDG_G_v4i8_ELE 2833 : VLDG_G_ELE_V4<"v4.u8 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2834defm INT_PTX_LDG_G_v4i16_ELE 2835 : VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>; 2836defm INT_PTX_LDG_G_v4i32_ELE 2837 : VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>; 2838defm INT_PTX_LDG_G_v4f32_ELE 2839 : VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>; 2840 2841 2842multiclass NG_TO_G<string Str> { 2843 def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2844 "cvta." # Str # ".u32 \t$result, $src;", []>; 2845 def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2846 "cvta." # Str # ".u64 \t$result, $src;", []>; 2847} 2848 2849multiclass G_TO_NG<string Str> { 2850 def "" : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), 2851 "cvta.to." # Str # ".u32 \t$result, $src;", []>; 2852 def _64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), 2853 "cvta.to." # Str # ".u64 \t$result, $src;", []>; 2854} 2855 2856defm cvta_local : NG_TO_G<"local">; 2857defm cvta_shared : NG_TO_G<"shared">; 2858defm cvta_global : NG_TO_G<"global">; 2859defm cvta_const : NG_TO_G<"const">; 2860 2861defm cvta_to_local : G_TO_NG<"local">; 2862defm cvta_to_shared : G_TO_NG<"shared">; 2863defm cvta_to_global : G_TO_NG<"global">; 2864defm cvta_to_const : G_TO_NG<"const">; 2865 2866// nvvm.ptr.param.to.gen 2867defm cvta_param : NG_TO_G<"param">; 2868 2869def : Pat<(int_nvvm_ptr_param_to_gen i32:$src), 2870 (cvta_param $src)>; 2871 2872def : Pat<(int_nvvm_ptr_param_to_gen i64:$src), 2873 (cvta_param_64 $src)>; 2874 2875// nvvm.ptr.gen.to.param 2876def : Pat<(int_nvvm_ptr_gen_to_param i32:$src), 2877 (i32 Int32Regs:$src)>; 2878 2879def : Pat<(int_nvvm_ptr_gen_to_param i64:$src), 2880 (i64 Int64Regs:$src)>; 2881 2882// nvvm.move intrinsicc 2883def nvvm_move_i16 : NVPTXInst<(outs Int16Regs:$r), (ins Int16Regs:$s), 2884 "mov.b16 \t$r, $s;", 2885 [(set i16:$r, 2886 (int_nvvm_move_i16 i16:$s))]>; 2887def nvvm_move_i32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2888 "mov.b32 \t$r, $s;", 2889 [(set i32:$r, 2890 (int_nvvm_move_i32 i32:$s))]>; 2891def nvvm_move_i64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2892 "mov.b64 \t$r, $s;", 2893 [(set i64:$r, 2894 (int_nvvm_move_i64 i64:$s))]>; 2895def nvvm_move_float : NVPTXInst<(outs Float32Regs:$r), (ins Float32Regs:$s), 2896 "mov.f32 \t$r, $s;", 2897 [(set f32:$r, 2898 (int_nvvm_move_float f32:$s))]>; 2899def nvvm_move_double : NVPTXInst<(outs Float64Regs:$r), (ins Float64Regs:$s), 2900 "mov.f64 \t$r, $s;", 2901 [(set f64:$r, 2902 (int_nvvm_move_double f64:$s))]>; 2903def nvvm_move_ptr32 : NVPTXInst<(outs Int32Regs:$r), (ins Int32Regs:$s), 2904 "mov.u32 \t$r, $s;", 2905 [(set i32:$r, 2906 (int_nvvm_move_ptr i32:$s))]>; 2907def nvvm_move_ptr64 : NVPTXInst<(outs Int64Regs:$r), (ins Int64Regs:$s), 2908 "mov.u64 \t$r, $s;", 2909 [(set i64:$r, 2910 (int_nvvm_move_ptr i64:$s))]>; 2911 2912// @TODO: Are these actually needed, or will we always just see symbols 2913// copied to registers first? 2914/*def nvvm_move_sym32 : NVPTXInst<(outs Int32Regs:$r), (ins imem:$s), 2915 "mov.u32 \t$r, $s;", 2916 [(set Int32Regs:$r, 2917 (int_nvvm_move_ptr texternalsym:$s))]>; 2918def nvvm_move_sym64 : NVPTXInst<(outs Int64Regs:$r), (ins imem:$s), 2919 "mov.u64 \t$r, $s;", 2920 [(set Int64Regs:$r, 2921 (int_nvvm_move_ptr texternalsym:$s))]>;*/ 2922 2923def texsurf_handles 2924 : NVPTXInst<(outs Int64Regs:$result), (ins imem:$src), 2925 "mov.u64 \t$result, $src;", []>; 2926 2927//----------------------------------- 2928// Compiler Error Warn 2929// - Just ignore them in codegen 2930//----------------------------------- 2931 2932def INT_NVVM_COMPILER_WARN_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2933 "// llvm.nvvm.compiler.warn()", 2934 [(int_nvvm_compiler_warn i32:$a)]>; 2935def INT_NVVM_COMPILER_WARN_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2936 "// llvm.nvvm.compiler.warn()", 2937 [(int_nvvm_compiler_warn i64:$a)]>; 2938def INT_NVVM_COMPILER_ERROR_32 : NVPTXInst<(outs), (ins Int32Regs:$a), 2939 "// llvm.nvvm.compiler.error()", 2940 [(int_nvvm_compiler_error i32:$a)]>; 2941def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), 2942 "// llvm.nvvm.compiler.error()", 2943 [(int_nvvm_compiler_error i64:$a)]>; 2944 2945 2946// isspacep 2947 2948multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> { 2949 def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), 2950 "isspacep." # suffix # "\t$d, $a;", 2951 [(set i1:$d, (Intr i32:$a))]>, 2952 Requires<Preds>; 2953 def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 2954 "isspacep." # suffix # "\t$d, $a;", 2955 [(set i1:$d, (Intr i64:$a))]>, 2956 Requires<Preds>; 2957} 2958 2959defm isspace_const : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>; 2960defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>; 2961defm isspace_local : ISSPACEP<"local", int_nvvm_isspacep_local>; 2962defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>; 2963defm isspace_shared_cluster : ISSPACEP<"shared::cluster", 2964 int_nvvm_isspacep_shared_cluster, 2965 [hasPTX<78>, hasSM<90>]>; 2966 2967// Special register reads 2968def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d), 2969 (ins SpecialRegs:$r), 2970 "mov.b32 \t$d, $r;", []>; 2971 2972def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>; 2973def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>; 2974def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>; 2975def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>; 2976def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>; 2977def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>; 2978def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>; 2979def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>; 2980def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>; 2981def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>; 2982def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>; 2983def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>; 2984def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>; 2985def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>; 2986def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>; 2987def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>; 2988def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>; 2989def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>; 2990def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>; 2991def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>; 2992def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>; 2993def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>; 2994def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>; 2995def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>; 2996def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>; 2997def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>; 2998def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>; 2999def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>; 3000def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>; 3001def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>; 3002def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; 3003def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; 3004 3005 3006def : Pat<(int_nvvm_swap_lo_hi_b64 i64:$src), 3007 (V2I32toI64 (I64toI32H $src), 3008 (I64toI32L $src))> ; 3009 3010//----------------------------------- 3011// Texture Intrinsics 3012//----------------------------------- 3013 3014// NOTE: For Fermi support, any new texture/surface/sampler intrinsics must be 3015// also defined in NVPTXReplaceImageHandles.cpp 3016 3017// texmode_independent 3018let IsTex = true, IsTexModeUnified = false in { 3019// Texture fetch instructions using handles 3020 3021class TEX_1D_base<string inst, NVPTXRegClass outtype, 3022 NVPTXRegClass intype, dag texsamp, list<dag> pattern = []> 3023 : NVPTXInst<(outs outtype:$r, outtype:$g, 3024 outtype:$b, outtype:$a), 3025 !con(texsamp, (ins intype:$x)), 3026 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}];", 3027 pattern>; 3028 3029multiclass TEX_1D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype, 3030 Intrinsic intr> { 3031 def _RR : TEX_1D_base< 3032 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3033 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3034 (intr i64:$t, i64:$s, intype:$x))]>; 3035 def _RI : TEX_1D_base<inst, outtype, intype, 3036 (ins Int64Regs:$t, i64imm:$s)>; 3037 def _IR : TEX_1D_base<inst, outtype, intype, 3038 (ins i64imm:$t, Int64Regs:$s)>; 3039 def _II : TEX_1D_base<inst, outtype, intype, 3040 (ins i64imm:$t, i64imm:$s)>; 3041} 3042 3043defm TEX_1D_F32_S32 : TEX_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs, 3044 int_nvvm_tex_1d_v4f32_s32>; 3045defm TEX_1D_F32_F32 : TEX_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs, 3046 int_nvvm_tex_1d_v4f32_f32>; 3047defm TEX_1D_S32_S32 : TEX_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs, 3048 int_nvvm_tex_1d_v4s32_s32>; 3049defm TEX_1D_S32_F32 : TEX_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs, 3050 int_nvvm_tex_1d_v4s32_f32>; 3051defm TEX_1D_U32_S32 : TEX_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs, 3052 int_nvvm_tex_1d_v4u32_s32>; 3053defm TEX_1D_U32_F32 : TEX_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs, 3054 int_nvvm_tex_1d_v4u32_f32>; 3055 3056class TEX_1D_LEVEL_base<string inst, NVPTXRegClass outtype, 3057 NVPTXRegClass intype, dag texsamp, 3058 list<dag> pattern = []> 3059 : NVPTXInst<(outs outtype:$r, outtype:$g, 3060 outtype:$b, outtype:$a), 3061 !con(texsamp, (ins intype:$x, intype:$lod)), 3062 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}], $lod;", 3063 pattern>; 3064 3065multiclass TEX_1D_LEVEL<string inst, NVPTXRegClass outtype, 3066 NVPTXRegClass intype, Intrinsic intr> { 3067 def _RR : TEX_1D_LEVEL_base< 3068 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3069 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3070 (intr i64:$t, i64:$s, intype:$x, intype:$lod))]>; 3071 def _RI : TEX_1D_LEVEL_base<inst, outtype, intype, 3072 (ins Int64Regs:$t, i64imm:$s)>; 3073 def _IR : TEX_1D_LEVEL_base<inst, outtype, intype, 3074 (ins i64imm:$t, Int64Regs:$s)>; 3075 def _II : TEX_1D_LEVEL_base<inst, outtype, intype, 3076 (ins i64imm:$t, i64imm:$s)>; 3077} 3078 3079defm TEX_1D_F32_F32_LEVEL : 3080 TEX_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs, 3081 int_nvvm_tex_1d_level_v4f32_f32>; 3082defm TEX_1D_S32_F32_LEVEL : 3083 TEX_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs, 3084 int_nvvm_tex_1d_level_v4s32_f32>; 3085defm TEX_1D_U32_F32_LEVEL : 3086 TEX_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs, 3087 int_nvvm_tex_1d_level_v4u32_f32>; 3088 3089class TEX_1D_GRAD_base<string inst, NVPTXRegClass outtype, 3090 NVPTXRegClass intype, dag texsamp, 3091 list<dag> pattern = []> 3092 : NVPTXInst<(outs outtype:$r, outtype:$g, 3093 outtype:$b, outtype:$a), 3094 !con(texsamp, (ins intype:$x, intype:$gradx, intype:$grady)), 3095 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x\\}]," 3096 " \\{$gradx\\}, \\{$grady\\};", 3097 pattern>; 3098 3099multiclass TEX_1D_GRAD<string inst, NVPTXRegClass outtype, 3100 NVPTXRegClass intype, Intrinsic intr> { 3101 def _RR : TEX_1D_GRAD_base< 3102 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3103 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3104 (intr i64:$t, i64:$s, intype:$x, intype:$gradx, intype:$grady))]>; 3105 def _RI : TEX_1D_GRAD_base<inst, outtype, intype, 3106 (ins Int64Regs:$t, i64imm:$s)>; 3107 def _IR : TEX_1D_GRAD_base<inst, outtype, intype, 3108 (ins i64imm:$t, Int64Regs:$s)>; 3109 def _II : TEX_1D_GRAD_base<inst, outtype, intype, 3110 (ins i64imm:$t, i64imm:$s)>; 3111} 3112 3113defm TEX_1D_F32_F32_GRAD 3114 : TEX_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs, 3115 int_nvvm_tex_1d_grad_v4f32_f32>; 3116defm TEX_1D_S32_F32_GRAD 3117 : TEX_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs, 3118 int_nvvm_tex_1d_grad_v4s32_f32>; 3119defm TEX_1D_U32_F32_GRAD 3120 : TEX_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs, 3121 int_nvvm_tex_1d_grad_v4u32_f32>; 3122 3123class TEX_1D_ARRAY_base<string inst, NVPTXRegClass outtype, 3124 NVPTXRegClass intype, dag texsamp, 3125 list<dag> pattern = []> 3126 : NVPTXInst<(outs outtype:$r, outtype:$g, 3127 outtype:$b, outtype:$a), 3128 !con(texsamp, (ins Int32Regs:$l, intype:$x)), 3129 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}];", 3130 pattern>; 3131 3132multiclass TEX_1D_ARRAY<string inst, NVPTXRegClass outtype, 3133 NVPTXRegClass intype, Intrinsic intr> { 3134 def _RR : TEX_1D_ARRAY_base< 3135 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3136 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3137 (intr i64:$t, i64:$s, i32:$l, intype:$x))]>; 3138 def _RI : TEX_1D_ARRAY_base<inst, outtype, intype, 3139 (ins Int64Regs:$t, i64imm:$s)>; 3140 def _IR : TEX_1D_ARRAY_base<inst, outtype, intype, 3141 (ins i64imm:$t, Int64Regs:$s)>; 3142 def _II : TEX_1D_ARRAY_base<inst, outtype, intype, 3143 (ins i64imm:$t, i64imm:$s)>; 3144} 3145 3146defm TEX_1D_ARRAY_F32_F32 3147 : TEX_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs, 3148 int_nvvm_tex_1d_array_v4f32_f32>; 3149defm TEX_1D_ARRAY_F32_S32 3150 : TEX_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs, 3151 int_nvvm_tex_1d_array_v4f32_s32>; 3152defm TEX_1D_ARRAY_S32_S32 3153 : TEX_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs, 3154 int_nvvm_tex_1d_array_v4s32_s32>; 3155defm TEX_1D_ARRAY_S32_F32 3156 : TEX_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs, 3157 int_nvvm_tex_1d_array_v4s32_f32>; 3158defm TEX_1D_ARRAY_U32_S32 3159 : TEX_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs, 3160 int_nvvm_tex_1d_array_v4u32_s32>; 3161defm TEX_1D_ARRAY_U32_F32 3162 : TEX_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs, 3163 int_nvvm_tex_1d_array_v4u32_f32>; 3164 3165class TEX_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3166 NVPTXRegClass intype, dag texsamp, 3167 list<dag> pattern = []> 3168 : NVPTXInst<(outs outtype:$r, outtype:$g, 3169 outtype:$b, outtype:$a), 3170 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$lod)), 3171 inst # " \t\\{$r, $g, $b, $a\\}," 3172 " [$t, $s, \\{$l, $x\\}], $lod;", 3173 pattern>; 3174 3175multiclass TEX_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3176 NVPTXRegClass intype, Intrinsic intr> { 3177 def _RR : TEX_1D_ARRAY_LEVEL_base< 3178 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3179 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3180 (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$lod))]>; 3181 def _RI : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3182 (ins Int64Regs:$t, i64imm:$s)>; 3183 def _IR : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3184 (ins i64imm:$t, Int64Regs:$s)>; 3185 def _II : TEX_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3186 (ins i64imm:$t, i64imm:$s)>; 3187} 3188 3189defm TEX_1D_ARRAY_F32_F32_LEVEL 3190 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", Float32Regs, Float32Regs, 3191 int_nvvm_tex_1d_array_level_v4f32_f32>; 3192defm TEX_1D_ARRAY_S32_F32_LEVEL 3193 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", Int32Regs, Float32Regs, 3194 int_nvvm_tex_1d_array_level_v4s32_f32>; 3195defm TEX_1D_ARRAY_U32_F32_LEVEL 3196 : TEX_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", Int32Regs, Float32Regs, 3197 int_nvvm_tex_1d_array_level_v4u32_f32>; 3198 3199class TEX_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3200 NVPTXRegClass intype, dag texsamp, 3201 list<dag> pattern = []> 3202 : NVPTXInst<(outs outtype:$r, outtype:$g, 3203 outtype:$b, outtype:$a), 3204 !con(texsamp, (ins Int32Regs:$l, intype:$x, 3205 intype:$gradx, intype:$grady)), 3206 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$l, $x\\}]," 3207 " \\{$gradx\\}, \\{$grady\\};", 3208 pattern>; 3209 3210multiclass TEX_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3211 NVPTXRegClass intype, Intrinsic intr> { 3212 def _RR : TEX_1D_ARRAY_GRAD_base< 3213 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3214 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3215 (intr i64:$t, i64:$s, i32:$l, intype:$x, 3216 intype:$gradx, intype:$grady))]>; 3217 def _RI : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 3218 (ins Int64Regs:$t, i64imm:$s)>; 3219 def _IR : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 3220 (ins i64imm:$t, Int64Regs:$s)>; 3221 def _II : TEX_1D_ARRAY_GRAD_base<inst, outtype, intype, 3222 (ins i64imm:$t, i64imm:$s)>; 3223} 3224 3225defm TEX_1D_ARRAY_F32_F32_GRAD 3226 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", Float32Regs, Float32Regs, 3227 int_nvvm_tex_1d_array_grad_v4f32_f32>; 3228defm TEX_1D_ARRAY_S32_F32_GRAD 3229 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", Int32Regs, Float32Regs, 3230 int_nvvm_tex_1d_array_grad_v4s32_f32>; 3231defm TEX_1D_ARRAY_U32_F32_GRAD 3232 : TEX_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", Int32Regs, Float32Regs, 3233 int_nvvm_tex_1d_array_grad_v4u32_f32>; 3234 3235class TEX_2D_base<string inst, NVPTXRegClass outtype, 3236 NVPTXRegClass intype, dag texsamp, list<dag> pattern = []> 3237 : NVPTXInst<(outs outtype:$r, outtype:$g, 3238 outtype:$b, outtype:$a), 3239 !con(texsamp, (ins intype:$x, intype:$y)), 3240 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}];", 3241 pattern>; 3242 3243multiclass TEX_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype, 3244 Intrinsic intr> { 3245 def _RR : TEX_2D_base< 3246 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3247 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3248 (intr i64:$t, i64:$s, intype:$x, intype:$y))]>; 3249 def _RI : TEX_2D_base<inst, outtype, intype, (ins Int64Regs:$t, i64imm:$s)>; 3250 def _IR : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, Int64Regs:$s)>; 3251 def _II : TEX_2D_base<inst, outtype, intype, (ins i64imm:$t, i64imm:$s)>; 3252} 3253 3254defm TEX_2D_F32_F32 : TEX_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs, 3255 int_nvvm_tex_2d_v4f32_f32>; 3256defm TEX_2D_F32_S32 : TEX_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs, 3257 int_nvvm_tex_2d_v4f32_s32>; 3258defm TEX_2D_S32_S32 : TEX_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs, 3259 int_nvvm_tex_2d_v4s32_s32>; 3260defm TEX_2D_S32_F32 : TEX_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs, 3261 int_nvvm_tex_2d_v4s32_f32>; 3262defm TEX_2D_U32_S32 : TEX_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs, 3263 int_nvvm_tex_2d_v4u32_s32>; 3264defm TEX_2D_U32_F32 : TEX_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs, 3265 int_nvvm_tex_2d_v4u32_f32>; 3266 3267class TEX_2D_LEVEL_base<string inst, NVPTXRegClass outtype, 3268 NVPTXRegClass intype, dag texsamp, 3269 list<dag> pattern = []> 3270 : NVPTXInst<(outs outtype:$r, outtype:$g, 3271 outtype:$b, outtype:$a), 3272 !con(texsamp, (ins intype:$x, intype:$y, intype:$lod)), 3273 inst # " \t\\{$r, $g, $b, $a\\}," 3274 " [$t, $s, \\{$x, $y\\}], $lod;", 3275 pattern>; 3276 3277multiclass TEX_2D_LEVEL<string inst, NVPTXRegClass outtype, 3278 NVPTXRegClass intype, Intrinsic intr> { 3279 def _RR : TEX_2D_LEVEL_base< 3280 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3281 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3282 (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$lod))]>; 3283 def _RI : TEX_2D_LEVEL_base<inst, outtype, intype, 3284 (ins Int64Regs:$t, i64imm:$s)>; 3285 def _IR : TEX_2D_LEVEL_base<inst, outtype, intype, 3286 (ins i64imm:$t, Int64Regs:$s)>; 3287 def _II : TEX_2D_LEVEL_base<inst, outtype, intype, 3288 (ins i64imm:$t, i64imm:$s)>; 3289} 3290 3291defm TEX_2D_F32_F32_LEVEL : 3292 TEX_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs, 3293 int_nvvm_tex_2d_level_v4f32_f32>; 3294defm TEX_2D_S32_F32_LEVEL : 3295 TEX_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs, 3296 int_nvvm_tex_2d_level_v4s32_f32>; 3297defm TEX_2D_U32_F32_LEVEL : 3298 TEX_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs, 3299 int_nvvm_tex_2d_level_v4u32_f32>; 3300 3301class TEX_2D_GRAD_base<string inst, NVPTXRegClass outtype, 3302 NVPTXRegClass intype, dag texsamp, 3303 list<dag> pattern = []> 3304 : NVPTXInst<(outs outtype:$r, outtype:$g, 3305 outtype:$b, outtype:$a), 3306 !con(texsamp, (ins intype:$x, intype:$y, 3307 intype:$gradx0, intype:$gradx1, 3308 intype:$grady0, intype:$grady1)), 3309 inst # " \t\\{$r, $g, $b, $a\\}, [$t, $s, \\{$x, $y\\}]," 3310 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3311 pattern>; 3312 3313multiclass TEX_2D_GRAD<string inst, NVPTXRegClass outtype, 3314 NVPTXRegClass intype, Intrinsic intr> { 3315 def _RR : TEX_2D_GRAD_base< 3316 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3317 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3318 (intr i64:$t, i64:$s, intype:$x, intype:$y, 3319 intype:$gradx0, intype:$gradx1, 3320 intype:$grady0, intype:$grady1))]>; 3321 def _RI : TEX_2D_GRAD_base<inst, outtype, intype, 3322 (ins Int64Regs:$t, i64imm:$s)>; 3323 def _IR : TEX_2D_GRAD_base<inst, outtype, intype, 3324 (ins i64imm:$t, Int64Regs:$s)>; 3325 def _II : TEX_2D_GRAD_base<inst, outtype, intype, 3326 (ins i64imm:$t, i64imm:$s)>; 3327} 3328 3329defm TEX_2D_F32_F32_GRAD : 3330 TEX_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs, 3331 int_nvvm_tex_2d_grad_v4f32_f32>; 3332defm TEX_2D_S32_F32_GRAD : 3333 TEX_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs, 3334 int_nvvm_tex_2d_grad_v4s32_f32>; 3335defm TEX_2D_U32_F32_GRAD : 3336 TEX_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs, 3337 int_nvvm_tex_2d_grad_v4u32_f32>; 3338 3339class TEX_2D_ARRAY_base<string inst, NVPTXRegClass outtype, 3340 NVPTXRegClass intype, dag texsamp, 3341 list<dag> pattern = []> 3342 : NVPTXInst<(outs outtype:$r, outtype:$g, 3343 outtype:$b, outtype:$a), 3344 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y)), 3345 inst # " \t\\{$r, $g, $b, $a\\}," 3346 " [$t, $s, \\{$l, $x, $y, $y\\}];", 3347 pattern>; 3348 3349multiclass TEX_2D_ARRAY<string inst, NVPTXRegClass outtype, 3350 NVPTXRegClass intype, Intrinsic intr> { 3351 def _RR : TEX_2D_ARRAY_base< 3352 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3353 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3354 (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y))]>; 3355 def _RI : TEX_2D_ARRAY_base<inst, outtype, intype, 3356 (ins Int64Regs:$t, i64imm:$s)>; 3357 def _IR : TEX_2D_ARRAY_base<inst, outtype, intype, 3358 (ins i64imm:$t, Int64Regs:$s)>; 3359 def _II : TEX_2D_ARRAY_base<inst, outtype, intype, 3360 (ins i64imm:$t, i64imm:$s)>; 3361} 3362 3363defm TEX_2D_ARRAY_F32_F32 3364 : TEX_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs, 3365 int_nvvm_tex_2d_array_v4f32_f32>; 3366defm TEX_2D_ARRAY_F32_S32 3367 : TEX_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs, 3368 int_nvvm_tex_2d_array_v4f32_s32>; 3369defm TEX_2D_ARRAY_S32_S32 3370 : TEX_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs, 3371 int_nvvm_tex_2d_array_v4s32_s32>; 3372defm TEX_2D_ARRAY_S32_F32 3373 : TEX_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs, 3374 int_nvvm_tex_2d_array_v4s32_f32>; 3375defm TEX_2D_ARRAY_U32_S32 3376 : TEX_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs, 3377 int_nvvm_tex_2d_array_v4u32_s32>; 3378defm TEX_2D_ARRAY_U32_F32 3379 : TEX_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs, 3380 int_nvvm_tex_2d_array_v4u32_f32>; 3381 3382class TEX_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3383 NVPTXRegClass intype, dag texsamp, 3384 list<dag> pattern = []> 3385 : NVPTXInst<(outs outtype:$r, outtype:$g, 3386 outtype:$b, outtype:$a), 3387 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3388 intype:$lod)), 3389 inst # " \t\\{$r, $g, $b, $a\\}," 3390 " [$t, $s, \\{$l, $x, $y, $y\\}], $lod;", 3391 pattern>; 3392 3393multiclass TEX_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3394 NVPTXRegClass intype, Intrinsic intr> { 3395 def _RR : TEX_2D_ARRAY_LEVEL_base< 3396 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3397 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3398 (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$lod))]>; 3399 def _RI : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3400 (ins Int64Regs:$t, i64imm:$s)>; 3401 def _IR : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3402 (ins i64imm:$t, Int64Regs:$s)>; 3403 def _II : TEX_2D_ARRAY_LEVEL_base<inst, outtype, intype, 3404 (ins i64imm:$t, i64imm:$s)>; 3405} 3406 3407defm TEX_2D_ARRAY_F32_F32_LEVEL 3408 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", Float32Regs, Float32Regs, 3409 int_nvvm_tex_2d_array_level_v4f32_f32>; 3410defm TEX_2D_ARRAY_S32_F32_LEVEL 3411 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", Int32Regs, Float32Regs, 3412 int_nvvm_tex_2d_array_level_v4s32_f32>; 3413defm TEX_2D_ARRAY_U32_F32_LEVEL 3414 : TEX_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", Int32Regs, Float32Regs, 3415 int_nvvm_tex_2d_array_level_v4u32_f32>; 3416 3417class TEX_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3418 NVPTXRegClass intype, dag texsamp, 3419 list<dag> pattern = []> 3420 : NVPTXInst<(outs outtype:$r, outtype:$g, 3421 outtype:$b, outtype:$a), 3422 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3423 intype:$gradx0, intype:$gradx1, 3424 intype:$grady0, intype:$grady1)), 3425 inst # " \t\\{$r, $g, $b, $a\\}," 3426 " [$t, $s, \\{$l, $x, $y, $y\\}]," 3427 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 3428 pattern>; 3429 3430multiclass TEX_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3431 NVPTXRegClass intype, Intrinsic intr> { 3432 def _RR : TEX_2D_ARRAY_GRAD_base< 3433 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3434 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3435 (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, 3436 intype:$gradx0, intype:$gradx1, 3437 intype:$grady0, intype:$grady1))]>; 3438 def _RI : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3439 (ins Int64Regs:$t, i64imm:$s)>; 3440 def _IR : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3441 (ins i64imm:$t, Int64Regs:$s)>; 3442 def _II : TEX_2D_ARRAY_GRAD_base<inst, outtype, intype, 3443 (ins i64imm:$t, i64imm:$s)>; 3444} 3445 3446defm TEX_2D_ARRAY_F32_F32_GRAD 3447 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", Float32Regs, Float32Regs, 3448 int_nvvm_tex_2d_array_grad_v4f32_f32>; 3449defm TEX_2D_ARRAY_S32_F32_GRAD 3450 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", Int32Regs, Float32Regs, 3451 int_nvvm_tex_2d_array_grad_v4s32_f32>; 3452defm TEX_2D_ARRAY_U32_F32_GRAD 3453 : TEX_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", Int32Regs, Float32Regs, 3454 int_nvvm_tex_2d_array_grad_v4u32_f32>; 3455 3456class TEX_3D_base<string inst, NVPTXRegClass outtype, 3457 NVPTXRegClass intype, dag texsamp, list<dag> pattern = []> 3458 : NVPTXInst<(outs outtype:$r, outtype:$g, 3459 outtype:$b, outtype:$a), 3460 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), 3461 inst # " \t\\{$r, $g, $b, $a\\}," 3462 " [$t, $s, \\{$x, $y, $z, $z\\}];", 3463 pattern>; 3464 3465multiclass TEX_3D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype, 3466 Intrinsic intr> { 3467 def _RR : TEX_3D_base< 3468 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3469 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3470 (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z))]>; 3471 def _RI : TEX_3D_base<inst, outtype, intype, 3472 (ins Int64Regs:$t, i64imm:$s)>; 3473 def _IR : TEX_3D_base<inst, outtype, intype, 3474 (ins i64imm:$t, Int64Regs:$s)>; 3475 def _II : TEX_3D_base<inst, outtype, intype, 3476 (ins i64imm:$t, i64imm:$s)>; 3477} 3478 3479defm TEX_3D_F32_F32 : TEX_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs, 3480 int_nvvm_tex_3d_v4f32_f32>; 3481defm TEX_3D_F32_S32 : TEX_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs, 3482 int_nvvm_tex_3d_v4f32_s32>; 3483defm TEX_3D_S32_S32 : TEX_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs, 3484 int_nvvm_tex_3d_v4s32_s32>; 3485defm TEX_3D_S32_F32 : TEX_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs, 3486 int_nvvm_tex_3d_v4s32_f32>; 3487defm TEX_3D_U32_S32 : TEX_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs, 3488 int_nvvm_tex_3d_v4u32_s32>; 3489defm TEX_3D_U32_F32 : TEX_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs, 3490 int_nvvm_tex_3d_v4u32_f32>; 3491 3492class TEX_3D_LEVEL_base<string inst, NVPTXRegClass outtype, 3493 NVPTXRegClass intype, dag texsamp, 3494 list<dag> pattern = []> 3495 : NVPTXInst<(outs outtype:$r, outtype:$g, 3496 outtype:$b, outtype:$a), 3497 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 3498 intype:$lod)), 3499 inst # " \t\\{$r, $g, $b, $a\\}," 3500 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 3501 pattern>; 3502 3503multiclass TEX_3D_LEVEL<string inst, NVPTXRegClass outtype, 3504 NVPTXRegClass intype, Intrinsic intr> { 3505 def _RR : TEX_3D_LEVEL_base< 3506 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3507 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3508 (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z, 3509 intype:$lod))]>; 3510 def _RI : TEX_3D_LEVEL_base<inst, outtype, intype, 3511 (ins Int64Regs:$t, i64imm:$s)>; 3512 def _IR : TEX_3D_LEVEL_base<inst, outtype, intype, 3513 (ins i64imm:$t, Int64Regs:$s)>; 3514 def _II : TEX_3D_LEVEL_base<inst, outtype, intype, 3515 (ins i64imm:$t, i64imm:$s)>; 3516} 3517 3518defm TEX_3D_F32_F32_LEVEL 3519 : TEX_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs, 3520 int_nvvm_tex_3d_level_v4f32_f32>; 3521defm TEX_3D_S32_F32_LEVEL 3522 : TEX_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs, 3523 int_nvvm_tex_3d_level_v4s32_f32>; 3524defm TEX_3D_U32_F32_LEVEL 3525 : TEX_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs, 3526 int_nvvm_tex_3d_level_v4u32_f32>; 3527 3528class TEX_3D_GRAD_base<string inst, NVPTXRegClass outtype, 3529 NVPTXRegClass intype, dag texsamp, 3530 list<dag> pattern = []> 3531 : NVPTXInst<(outs outtype:$r, outtype:$g, 3532 outtype:$b, outtype:$a), 3533 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 3534 intype :$gradx0, intype:$gradx1, 3535 intype:$gradx2, intype:$grady0, 3536 intype:$grady1, intype:$grady2)), 3537 inst # " \t\\{$r, $g, $b, $a\\}," 3538 " [$t, $s, \\{$x, $y, $z, $z\\}]," 3539 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 3540 " \\{$grady0, $grady1, $grady2, $grady2\\};", 3541 pattern>; 3542 3543multiclass TEX_3D_GRAD<string inst, NVPTXRegClass outtype, 3544 NVPTXRegClass intype, Intrinsic intr> { 3545 def _RR : TEX_3D_GRAD_base< 3546 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3547 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3548 (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z, 3549 intype:$gradx0, intype:$gradx1, intype:$gradx2, 3550 intype:$grady0, intype:$grady1, intype:$grady2))]>; 3551 def _RI : TEX_3D_GRAD_base<inst, outtype, intype, 3552 (ins Int64Regs:$t, i64imm:$s)>; 3553 def _IR : TEX_3D_GRAD_base<inst, outtype, intype, 3554 (ins i64imm:$t, Int64Regs:$s)>; 3555 def _II : TEX_3D_GRAD_base<inst, outtype, intype, 3556 (ins i64imm:$t, i64imm:$s)>; 3557} 3558 3559defm TEX_3D_F32_F32_GRAD 3560 : TEX_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs, 3561 int_nvvm_tex_3d_grad_v4f32_f32>; 3562defm TEX_3D_S32_F32_GRAD 3563 : TEX_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs, 3564 int_nvvm_tex_3d_grad_v4s32_f32>; 3565defm TEX_3D_U32_F32_GRAD 3566 : TEX_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs, 3567 int_nvvm_tex_3d_grad_v4u32_f32>; 3568 3569class TEX_CUBE_base<string inst, NVPTXRegClass outtype, 3570 NVPTXRegClass intype, dag texsamp, list<dag> pattern = []> 3571 : NVPTXInst<(outs outtype:$r, outtype:$g, 3572 outtype:$b, outtype:$a), 3573 !con(texsamp, (ins intype:$x, intype:$y, intype:$z)), 3574 inst # " \t\\{$r, $g, $b, $a\\}," 3575 " [$t, $s, \\{$x, $y, $z, $z\\}];", 3576 pattern>; 3577 3578multiclass TEX_CUBE<string inst, NVPTXRegClass outtype, NVPTXRegClass intype, 3579 Intrinsic intr> { 3580 def _RR : TEX_CUBE_base< 3581 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3582 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3583 (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z))]>; 3584 def _RI : TEX_CUBE_base<inst, outtype, intype, 3585 (ins Int64Regs:$t, i64imm:$s)>; 3586 def _IR : TEX_CUBE_base<inst, outtype, intype, 3587 (ins i64imm:$t, Int64Regs:$s)>; 3588 def _II : TEX_CUBE_base<inst, outtype, intype, 3589 (ins i64imm:$t, i64imm:$s)>; 3590} 3591 3592defm TEX_CUBE_F32_F32 3593 : TEX_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs, 3594 int_nvvm_tex_cube_v4f32_f32>; 3595defm TEX_CUBE_S32_F32 3596 : TEX_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs, 3597 int_nvvm_tex_cube_v4s32_f32>; 3598defm TEX_CUBE_U32_F32 3599 : TEX_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs, 3600 int_nvvm_tex_cube_v4u32_f32>; 3601 3602class TEX_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, 3603 NVPTXRegClass intype, dag texsamp, 3604 list<dag> pattern = []> 3605 : NVPTXInst<(outs outtype:$r, outtype:$g, 3606 outtype:$b, outtype:$a), 3607 !con(texsamp, (ins intype:$x, intype:$y, intype:$z, 3608 intype:$lod)), 3609 inst # " \t\\{$r, $g, $b, $a\\}," 3610 " [$t, $s, \\{$x, $y, $z, $z\\}], $lod;", 3611 pattern>; 3612 3613multiclass TEX_CUBE_LEVEL<string inst, NVPTXRegClass outtype, 3614 NVPTXRegClass intype, Intrinsic intr> { 3615 def _RR : TEX_CUBE_LEVEL_base< 3616 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3617 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3618 (intr i64:$t, i64:$s, intype:$x, intype:$y, intype:$z, 3619 intype:$lod))]>; 3620 def _RI : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3621 (ins Int64Regs:$t, i64imm:$s)>; 3622 def _IR : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3623 (ins i64imm:$t, Int64Regs:$s)>; 3624 def _II : TEX_CUBE_LEVEL_base<inst, outtype, intype, 3625 (ins i64imm:$t, i64imm:$s)>; 3626} 3627 3628defm TEX_CUBE_F32_F32_LEVEL 3629 : TEX_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", Float32Regs, Float32Regs, 3630 int_nvvm_tex_cube_level_v4f32_f32>; 3631defm TEX_CUBE_S32_F32_LEVEL 3632 : TEX_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", Int32Regs, Float32Regs, 3633 int_nvvm_tex_cube_level_v4s32_f32>; 3634defm TEX_CUBE_U32_F32_LEVEL 3635 : TEX_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", Int32Regs, Float32Regs, 3636 int_nvvm_tex_cube_level_v4u32_f32>; 3637 3638class TEX_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, 3639 NVPTXRegClass intype, dag texsamp, 3640 list<dag> pattern = []> 3641 : NVPTXInst<(outs outtype:$r, outtype:$g, 3642 outtype:$b, outtype:$a), 3643 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3644 intype:$z)), 3645 inst # " \t\\{$r, $g, $b, $a\\}," 3646 " [$t, $s, \\{$l, $x, $y, $z\\}];", 3647 pattern>; 3648 3649multiclass TEX_CUBE_ARRAY<string inst, NVPTXRegClass outtype, 3650 NVPTXRegClass intype, Intrinsic intr> { 3651 def _RR : TEX_CUBE_ARRAY_base< 3652 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3653 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3654 (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$z))]>; 3655 def _RI : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3656 (ins Int64Regs:$t, i64imm:$s)>; 3657 def _IR : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3658 (ins i64imm:$t, Int64Regs:$s)>; 3659 def _II : TEX_CUBE_ARRAY_base<inst, outtype, intype, 3660 (ins i64imm:$t, i64imm:$s)>; 3661} 3662 3663defm TEX_CUBE_ARRAY_F32_F32 3664 : TEX_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs, 3665 int_nvvm_tex_cube_array_v4f32_f32>; 3666defm TEX_CUBE_ARRAY_S32_F32 3667 : TEX_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs, 3668 int_nvvm_tex_cube_array_v4s32_f32>; 3669defm TEX_CUBE_ARRAY_U32_F32 3670 : TEX_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs, 3671 int_nvvm_tex_cube_array_v4u32_f32>; 3672 3673class TEX_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3674 NVPTXRegClass intype, dag texsamp, 3675 list<dag> pattern = []> 3676 : NVPTXInst<(outs outtype:$r, outtype:$g, 3677 outtype:$b, outtype:$a), 3678 !con(texsamp, (ins Int32Regs:$l, intype:$x, intype:$y, 3679 intype:$z, intype:$lod)), 3680 inst # " \t\\{$r, $g, $b, $a\\}," 3681 " [$t, $s, \\{$l, $x, $y, $z\\}], $lod;", 3682 pattern>; 3683 3684multiclass TEX_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3685 NVPTXRegClass intype, Intrinsic intr> { 3686 def _RR : TEX_CUBE_ARRAY_LEVEL_base< 3687 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3688 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3689 (intr i64:$t, i64:$s, i32:$l, intype:$x, intype:$y, intype:$z, 3690 intype:$lod))]>; 3691 def _RI : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3692 (ins Int64Regs:$t, i64imm:$s)>; 3693 def _IR : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3694 (ins i64imm:$t, Int64Regs:$s)>; 3695 def _II : TEX_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 3696 (ins i64imm:$t, i64imm:$s)>; 3697} 3698 3699defm TEX_CUBE_ARRAY_F32_F32_LEVEL 3700 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", 3701 Float32Regs, Float32Regs, 3702 int_nvvm_tex_cube_array_level_v4f32_f32>; 3703defm TEX_CUBE_ARRAY_S32_F32_LEVEL 3704 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", 3705 Int32Regs, Float32Regs, 3706 int_nvvm_tex_cube_array_level_v4s32_f32>; 3707defm TEX_CUBE_ARRAY_U32_F32_LEVEL 3708 : TEX_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", 3709 Int32Regs, Float32Regs, 3710 int_nvvm_tex_cube_array_level_v4u32_f32>; 3711 3712class TLD4_2D_base<string inst, NVPTXRegClass outtype, 3713 NVPTXRegClass intype, dag texsamp, list<dag> pattern = []> 3714 : NVPTXInst<(outs outtype:$v0, outtype:$v1, 3715 outtype:$v2, outtype:$v3), 3716 !con(texsamp, (ins intype:$x, intype:$y)), 3717 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, $s, \\{$x, $y\\}];", 3718 pattern>; 3719 3720multiclass TLD4_2D<string inst, NVPTXRegClass outtype, NVPTXRegClass intype, 3721 Intrinsic intr> { 3722 def _RR : TLD4_2D_base< 3723 inst, outtype, intype, (ins Int64Regs:$t, Int64Regs:$s), 3724 [(set outtype:$v0, outtype:$v1, outtype:$v2, outtype:$v3, 3725 (intr i64:$t, i64:$s, intype:$x, intype:$y))]>; 3726 def _RI : TLD4_2D_base<inst, outtype, intype, 3727 (ins Int64Regs:$t, i64imm:$s)>; 3728 def _IR : TLD4_2D_base<inst, outtype, intype, 3729 (ins i64imm:$t, Int64Regs:$s)>; 3730 def _II : TLD4_2D_base<inst, outtype, intype, 3731 (ins i64imm:$t, i64imm:$s)>; 3732} 3733 3734defm TLD4_R_2D_F32_F32 3735 : TLD4_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs, 3736 int_nvvm_tld4_r_2d_v4f32_f32>; 3737defm TLD4_G_2D_F32_F32 3738 : TLD4_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs, 3739 int_nvvm_tld4_g_2d_v4f32_f32>; 3740defm TLD4_B_2D_F32_F32 3741 : TLD4_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs, 3742 int_nvvm_tld4_b_2d_v4f32_f32>; 3743defm TLD4_A_2D_F32_F32 3744 : TLD4_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs, 3745 int_nvvm_tld4_a_2d_v4f32_f32>; 3746 3747defm TLD4_R_2D_S32_F32 3748 : TLD4_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs, 3749 int_nvvm_tld4_r_2d_v4s32_f32>; 3750defm TLD4_G_2D_S32_F32 3751 : TLD4_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs, 3752 int_nvvm_tld4_g_2d_v4s32_f32>; 3753defm TLD4_B_2D_S32_F32 3754 : TLD4_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs, 3755 int_nvvm_tld4_b_2d_v4s32_f32>; 3756defm TLD4_A_2D_S32_F32 3757 : TLD4_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs, 3758 int_nvvm_tld4_a_2d_v4s32_f32>; 3759 3760defm TLD4_R_2D_U32_F32 3761 : TLD4_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs, 3762 int_nvvm_tld4_r_2d_v4u32_f32>; 3763defm TLD4_G_2D_U32_F32 3764 : TLD4_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs, 3765 int_nvvm_tld4_g_2d_v4u32_f32>; 3766defm TLD4_B_2D_U32_F32 3767 : TLD4_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs, 3768 int_nvvm_tld4_b_2d_v4u32_f32>; 3769defm TLD4_A_2D_U32_F32 3770 : TLD4_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs, 3771 int_nvvm_tld4_a_2d_v4u32_f32>; 3772 3773} 3774 3775 3776// texmode_unified 3777let IsTex = true, IsTexModeUnified = true in { 3778// Texture fetch instructions using handles 3779 3780class TEX_UNIFIED_1D_base<string inst, NVPTXRegClass outtype, 3781 NVPTXRegClass intype, dag tex, list<dag> pattern = []> 3782 : NVPTXInst<(outs outtype:$r, outtype:$g, 3783 outtype:$b, outtype:$a), 3784 !con(tex, (ins intype:$x)), 3785 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}];", 3786 pattern>; 3787 3788multiclass TEX_UNIFIED_1D<string inst, NVPTXRegClass outtype, 3789 NVPTXRegClass intype, Intrinsic intr> { 3790 def _R : TEX_UNIFIED_1D_base< 3791 inst, outtype, intype, (ins Int64Regs:$t), 3792 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3793 (intr i64:$t, intype:$x))]>; 3794 def _I : TEX_UNIFIED_1D_base<inst, outtype, intype, (ins i64imm:$t)>; 3795} 3796 3797defm TEX_UNIFIED_1D_F32_S32 3798 : TEX_UNIFIED_1D<"tex.1d.v4.f32.s32", Float32Regs, Int32Regs, 3799 int_nvvm_tex_unified_1d_v4f32_s32>; 3800defm TEX_UNIFIED_1D_F32_F32 3801 : TEX_UNIFIED_1D<"tex.1d.v4.f32.f32", Float32Regs, Float32Regs, 3802 int_nvvm_tex_unified_1d_v4f32_f32>; 3803defm TEX_UNIFIED_1D_S32_S32 3804 : TEX_UNIFIED_1D<"tex.1d.v4.s32.s32", Int32Regs, Int32Regs, 3805 int_nvvm_tex_unified_1d_v4s32_s32>; 3806defm TEX_UNIFIED_1D_S32_F32 3807 : TEX_UNIFIED_1D<"tex.1d.v4.s32.f32", Int32Regs, Float32Regs, 3808 int_nvvm_tex_unified_1d_v4s32_f32>; 3809defm TEX_UNIFIED_1D_U32_S32 3810 : TEX_UNIFIED_1D<"tex.1d.v4.u32.s32", Int32Regs, Int32Regs, 3811 int_nvvm_tex_unified_1d_v4u32_s32>; 3812defm TEX_UNIFIED_1D_U32_F32 3813 : TEX_UNIFIED_1D<"tex.1d.v4.u32.f32", Int32Regs, Float32Regs, 3814 int_nvvm_tex_unified_1d_v4u32_f32>; 3815 3816class TEX_UNIFIED_1D_LEVEL_base<string inst, NVPTXRegClass outtype, 3817 NVPTXRegClass intype, dag tex, 3818 list<dag> pattern = []> 3819 : NVPTXInst<(outs outtype:$r, outtype:$g, 3820 outtype:$b, outtype:$a), 3821 !con(tex, (ins intype:$x, intype:$lod)), 3822 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x\\}], $lod;", 3823 pattern>; 3824 3825multiclass TEX_UNIFIED_1D_LEVEL<string inst, NVPTXRegClass outtype, 3826 NVPTXRegClass intype, Intrinsic intr> { 3827 def _R : TEX_UNIFIED_1D_LEVEL_base< 3828 inst, outtype, intype, (ins Int64Regs:$t), 3829 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3830 (intr i64:$t, intype:$x, intype:$lod))]>; 3831 def _I : TEX_UNIFIED_1D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 3832} 3833 3834defm TEX_UNIFIED_1D_F32_F32_LEVEL 3835 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.f32.f32", Float32Regs, Float32Regs, 3836 int_nvvm_tex_unified_1d_level_v4f32_f32>; 3837defm TEX_UNIFIED_1D_S32_F32_LEVEL 3838 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.s32.f32", Int32Regs, Float32Regs, 3839 int_nvvm_tex_unified_1d_level_v4s32_f32>; 3840defm TEX_UNIFIED_1D_U32_F32_LEVEL 3841 : TEX_UNIFIED_1D_LEVEL<"tex.level.1d.v4.u32.f32", Int32Regs, Float32Regs, 3842 int_nvvm_tex_unified_1d_level_v4u32_f32>; 3843 3844class TEX_UNIFIED_1D_GRAD_base<string inst, NVPTXRegClass outtype, 3845 NVPTXRegClass intype, dag tex, 3846 list<dag> pattern = []> 3847 : NVPTXInst<(outs outtype:$r, outtype:$g, 3848 outtype:$b, outtype:$a), 3849 !con(tex, (ins intype:$x, intype:$gradx, intype:$grady)), 3850 inst # " \t\\{$r, $g, $b, $a\\}," 3851 " [$t, \\{$x\\}], \\{$gradx\\}, \\{$grady\\};", 3852 pattern>; 3853 3854multiclass TEX_UNIFIED_1D_GRAD<string inst, NVPTXRegClass outtype, 3855 NVPTXRegClass intype, Intrinsic intr> { 3856 def _R : TEX_UNIFIED_1D_GRAD_base< 3857 inst, outtype, intype, (ins Int64Regs:$t), 3858 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3859 (intr i64:$t, intype:$x, intype:$gradx, intype:$grady))]>; 3860 def _I : TEX_UNIFIED_1D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 3861} 3862 3863defm TEX_UNIFIED_1D_F32_F32_GRAD 3864 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.f32.f32", Float32Regs, Float32Regs, 3865 int_nvvm_tex_unified_1d_grad_v4f32_f32>; 3866defm TEX_UNIFIED_1D_S32_F32_GRAD 3867 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.s32.f32", Int32Regs, Float32Regs, 3868 int_nvvm_tex_unified_1d_grad_v4s32_f32>; 3869defm TEX_UNIFIED_1D_U32_F32_GRAD 3870 : TEX_UNIFIED_1D_GRAD<"tex.grad.1d.v4.u32.f32", Int32Regs, Float32Regs, 3871 int_nvvm_tex_unified_1d_grad_v4u32_f32>; 3872 3873class TEX_UNIFIED_1D_ARRAY_base<string inst, NVPTXRegClass outtype, 3874 NVPTXRegClass intype, dag tex, 3875 list<dag> pattern = []> 3876 : NVPTXInst<(outs outtype:$r, outtype:$g, 3877 outtype:$b, outtype:$a), 3878 !con(tex, (ins Int32Regs:$l, intype:$x)), 3879 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}];", 3880 pattern>; 3881 3882multiclass TEX_UNIFIED_1D_ARRAY<string inst, NVPTXRegClass outtype, 3883 NVPTXRegClass intype, Intrinsic intr> { 3884 def _R : TEX_UNIFIED_1D_ARRAY_base< 3885 inst, outtype, intype, (ins Int64Regs:$t), 3886 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3887 (intr i64:$t, i32:$l, intype:$x))]>; 3888 def _I : TEX_UNIFIED_1D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; 3889} 3890 3891defm TEX_UNIFIED_1D_ARRAY_F32_S32 3892 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.s32", Float32Regs, Int32Regs, 3893 int_nvvm_tex_unified_1d_array_v4f32_s32>; 3894defm TEX_UNIFIED_1D_ARRAY_F32_F32 3895 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.f32.f32", Float32Regs, Float32Regs, 3896 int_nvvm_tex_unified_1d_array_v4f32_f32>; 3897defm TEX_UNIFIED_1D_ARRAY_S32_S32 3898 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.s32", Int32Regs, Int32Regs, 3899 int_nvvm_tex_unified_1d_array_v4s32_s32>; 3900defm TEX_UNIFIED_1D_ARRAY_S32_F32 3901 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.s32.f32", Int32Regs, Float32Regs, 3902 int_nvvm_tex_unified_1d_array_v4s32_f32>; 3903defm TEX_UNIFIED_1D_ARRAY_U32_S32 3904 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.s32", Int32Regs, Int32Regs, 3905 int_nvvm_tex_unified_1d_array_v4u32_s32>; 3906defm TEX_UNIFIED_1D_ARRAY_U32_F32 3907 : TEX_UNIFIED_1D_ARRAY<"tex.a1d.v4.u32.f32", Int32Regs, Float32Regs, 3908 int_nvvm_tex_unified_1d_array_v4u32_f32>; 3909 3910class TEX_UNIFIED_1D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 3911 NVPTXRegClass intype, dag tex, 3912 list<dag> pattern = []> 3913 : NVPTXInst<(outs outtype:$r, outtype:$g, 3914 outtype:$b, outtype:$a), 3915 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$lod)), 3916 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x\\}], $lod;", 3917 pattern>; 3918 3919multiclass TEX_UNIFIED_1D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 3920 NVPTXRegClass intype, Intrinsic intr> { 3921 def _R : TEX_UNIFIED_1D_ARRAY_LEVEL_base< 3922 inst, outtype, intype, (ins Int64Regs:$t), 3923 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3924 (intr i64:$t, i32:$l, intype:$x, intype:$lod))]>; 3925 def _I : TEX_UNIFIED_1D_ARRAY_LEVEL_base<inst, outtype, intype, 3926 (ins i64imm:$t)>; 3927} 3928 3929defm TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL 3930 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.f32.f32", 3931 Float32Regs, Float32Regs, 3932 int_nvvm_tex_unified_1d_array_level_v4f32_f32>; 3933defm TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL 3934 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.s32.f32", 3935 Int32Regs, Float32Regs, 3936 int_nvvm_tex_unified_1d_array_level_v4s32_f32>; 3937defm TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL 3938 : TEX_UNIFIED_1D_ARRAY_LEVEL<"tex.level.a1d.v4.u32.f32", 3939 Int32Regs, Float32Regs, 3940 int_nvvm_tex_unified_1d_array_level_v4u32_f32>; 3941 3942class TEX_UNIFIED_1D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 3943 NVPTXRegClass intype, dag tex, 3944 list<dag> pattern = []> 3945 : NVPTXInst<(outs outtype:$r, outtype:$g, 3946 outtype:$b, outtype:$a), 3947 !con(tex, (ins Int32Regs:$l, intype:$x, 3948 intype:$gradx, intype:$grady)), 3949 inst # " \t\\{$r, $g, $b, $a\\}," 3950 " [$t, \\{$l, $x\\}], \\{$gradx\\}, \\{$grady\\};", 3951 pattern>; 3952 3953multiclass TEX_UNIFIED_1D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 3954 NVPTXRegClass intype, Intrinsic intr> { 3955 def _R : TEX_UNIFIED_1D_ARRAY_GRAD_base< 3956 inst, outtype, intype, (ins Int64Regs:$t), 3957 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3958 (intr i64:$t, i32:$l, intype:$x, intype:$gradx, intype:$grady))]>; 3959 def _I : TEX_UNIFIED_1D_ARRAY_GRAD_base<inst, outtype, intype, 3960 (ins i64imm:$t)>; 3961} 3962 3963defm TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD 3964 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.f32.f32", 3965 Float32Regs, Float32Regs, 3966 int_nvvm_tex_unified_1d_array_grad_v4f32_f32>; 3967defm TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD 3968 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.s32.f32", 3969 Int32Regs, Float32Regs, 3970 int_nvvm_tex_unified_1d_array_grad_v4s32_f32>; 3971defm TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD 3972 : TEX_UNIFIED_1D_ARRAY_GRAD<"tex.grad.a1d.v4.u32.f32", 3973 Int32Regs, Float32Regs, 3974 int_nvvm_tex_unified_1d_array_grad_v4u32_f32>; 3975 3976class TEX_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, 3977 NVPTXRegClass intype, dag tex, list<dag> pattern = []> 3978 : NVPTXInst<(outs outtype:$r, outtype:$g, 3979 outtype:$b, outtype:$a), 3980 !con(tex, (ins intype:$x, intype:$y)), 3981 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}];", 3982 pattern>; 3983 3984multiclass TEX_UNIFIED_2D<string inst, NVPTXRegClass outtype, 3985 NVPTXRegClass intype, Intrinsic intr> { 3986 def _R : TEX_UNIFIED_2D_base< 3987 inst, outtype, intype, (ins Int64Regs:$t), 3988 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 3989 (intr i64:$t, intype:$x, intype:$y))]>; 3990 def _I : TEX_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; 3991} 3992 3993defm TEX_UNIFIED_2D_F32_S32 3994 : TEX_UNIFIED_2D<"tex.2d.v4.f32.s32", Float32Regs, Int32Regs, 3995 int_nvvm_tex_unified_2d_v4f32_s32>; 3996defm TEX_UNIFIED_2D_F32_F32 3997 : TEX_UNIFIED_2D<"tex.2d.v4.f32.f32", Float32Regs, Float32Regs, 3998 int_nvvm_tex_unified_2d_v4f32_f32>; 3999defm TEX_UNIFIED_2D_S32_S32 4000 : TEX_UNIFIED_2D<"tex.2d.v4.s32.s32", Int32Regs, Int32Regs, 4001 int_nvvm_tex_unified_2d_v4s32_s32>; 4002defm TEX_UNIFIED_2D_S32_F32 4003 : TEX_UNIFIED_2D<"tex.2d.v4.s32.f32", Int32Regs, Float32Regs, 4004 int_nvvm_tex_unified_2d_v4s32_f32>; 4005defm TEX_UNIFIED_2D_U32_S32 4006 : TEX_UNIFIED_2D<"tex.2d.v4.u32.s32", Int32Regs, Int32Regs, 4007 int_nvvm_tex_unified_2d_v4u32_s32>; 4008defm TEX_UNIFIED_2D_U32_F32 4009 : TEX_UNIFIED_2D<"tex.2d.v4.u32.f32", Int32Regs, Float32Regs, 4010 int_nvvm_tex_unified_2d_v4u32_f32>; 4011 4012class TEX_UNIFIED_2D_LEVEL_base<string inst, NVPTXRegClass outtype, 4013 NVPTXRegClass intype, dag tex, 4014 list<dag> pattern = []> 4015 : NVPTXInst<(outs outtype:$r, outtype:$g, 4016 outtype:$b, outtype:$a), 4017 !con(tex, (ins intype:$x, intype:$y, intype:$lod)), 4018 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}], $lod;", 4019 pattern>; 4020 4021multiclass TEX_UNIFIED_2D_LEVEL<string inst, NVPTXRegClass outtype, 4022 NVPTXRegClass intype, Intrinsic intr> { 4023 def _R : TEX_UNIFIED_2D_LEVEL_base< 4024 inst, outtype, intype, (ins Int64Regs:$t), 4025 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4026 (intr i64:$t, intype:$x, intype:$y, intype:$lod))]>; 4027 def _I : TEX_UNIFIED_2D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 4028} 4029 4030defm TEX_UNIFIED_2D_F32_F32_LEVEL 4031 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.f32.f32", Float32Regs, Float32Regs, 4032 int_nvvm_tex_unified_2d_level_v4f32_f32>; 4033defm TEX_UNIFIED_2D_S32_F32_LEVEL 4034 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.s32.f32", Int32Regs, Float32Regs, 4035 int_nvvm_tex_unified_2d_level_v4s32_f32>; 4036defm TEX_UNIFIED_2D_U32_F32_LEVEL 4037 : TEX_UNIFIED_2D_LEVEL<"tex.level.2d.v4.u32.f32", Int32Regs, Float32Regs, 4038 int_nvvm_tex_unified_2d_level_v4u32_f32>; 4039 4040class TEX_UNIFIED_2D_GRAD_base<string inst, NVPTXRegClass outtype, 4041 NVPTXRegClass intype, dag tex, 4042 list<dag> pattern = []> 4043 : NVPTXInst<(outs outtype:$r, outtype:$g, 4044 outtype:$b, outtype:$a), 4045 !con(tex, (ins intype:$x, intype:$y, 4046 intype:$gradx0, intype:$gradx1, 4047 intype:$grady0, intype:$grady1)), 4048 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y\\}]," 4049 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 4050 pattern>; 4051multiclass TEX_UNIFIED_2D_GRAD<string inst, NVPTXRegClass outtype, 4052 NVPTXRegClass intype, Intrinsic intr> { 4053 def _R : TEX_UNIFIED_2D_GRAD_base< 4054 inst, outtype, intype, (ins Int64Regs:$t), 4055 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4056 (intr i64:$t, intype:$x, intype:$y, 4057 intype:$gradx0, intype:$gradx1, 4058 intype:$grady0, intype:$grady1))]>; 4059 def _I : TEX_UNIFIED_2D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 4060} 4061 4062defm TEX_UNIFIED_2D_F32_F32_GRAD 4063 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.f32.f32", Float32Regs, Float32Regs, 4064 int_nvvm_tex_unified_2d_grad_v4f32_f32>; 4065defm TEX_UNIFIED_2D_S32_F32_GRAD 4066 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.s32.f32", Int32Regs, Float32Regs, 4067 int_nvvm_tex_unified_2d_grad_v4s32_f32>; 4068defm TEX_UNIFIED_2D_U32_F32_GRAD 4069 : TEX_UNIFIED_2D_GRAD<"tex.grad.2d.v4.u32.f32", Int32Regs, Float32Regs, 4070 int_nvvm_tex_unified_2d_grad_v4u32_f32>; 4071 4072class TEX_UNIFIED_2D_ARRAY_base<string inst, NVPTXRegClass outtype, 4073 NVPTXRegClass intype, dag tex, 4074 list<dag> pattern = []> 4075 : NVPTXInst<(outs outtype:$r, outtype:$g, 4076 outtype:$b, outtype:$a), 4077 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y)), 4078 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}];", 4079 pattern>; 4080multiclass TEX_UNIFIED_2D_ARRAY<string inst, NVPTXRegClass outtype, 4081 NVPTXRegClass intype, Intrinsic intr> { 4082 def _R : TEX_UNIFIED_2D_ARRAY_base< 4083 inst, outtype, intype, (ins Int64Regs:$t), 4084 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4085 (intr i64:$t, i32:$l, intype:$x, intype:$y))]>; 4086 def _I : TEX_UNIFIED_2D_ARRAY_base<inst, outtype, intype, (ins i64imm:$t)>; 4087} 4088 4089defm TEX_UNIFIED_2D_ARRAY_F32_S32 4090 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.s32", Float32Regs, Int32Regs, 4091 int_nvvm_tex_unified_2d_array_v4f32_s32>; 4092defm TEX_UNIFIED_2D_ARRAY_F32_F32 4093 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.f32.f32", Float32Regs, Float32Regs, 4094 int_nvvm_tex_unified_2d_array_v4f32_f32>; 4095defm TEX_UNIFIED_2D_ARRAY_S32_S32 4096 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.s32", Int32Regs, Int32Regs, 4097 int_nvvm_tex_unified_2d_array_v4s32_s32>; 4098defm TEX_UNIFIED_2D_ARRAY_S32_F32 4099 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.s32.f32", Int32Regs, Float32Regs, 4100 int_nvvm_tex_unified_2d_array_v4s32_f32>; 4101defm TEX_UNIFIED_2D_ARRAY_U32_S32 4102 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.s32", Int32Regs, Int32Regs, 4103 int_nvvm_tex_unified_2d_array_v4u32_s32>; 4104defm TEX_UNIFIED_2D_ARRAY_U32_F32 4105 : TEX_UNIFIED_2D_ARRAY<"tex.a2d.v4.u32.f32", Int32Regs, Float32Regs, 4106 int_nvvm_tex_unified_2d_array_v4u32_f32>; 4107 4108class TEX_UNIFIED_2D_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 4109 NVPTXRegClass intype, dag tex, 4110 list<dag> pattern = []> 4111 : NVPTXInst<(outs outtype:$r, outtype:$g, 4112 outtype:$b, outtype:$a), 4113 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, 4114 intype:$lod)), 4115 inst # " \t\\{$r, $g, $b, $a\\}," 4116 " [$t, \\{$l, $x, $y, $y\\}], $lod;", 4117 pattern>; 4118multiclass TEX_UNIFIED_2D_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 4119 NVPTXRegClass intype, Intrinsic intr> { 4120 def _R : TEX_UNIFIED_2D_ARRAY_LEVEL_base< 4121 inst, outtype, intype, (ins Int64Regs:$t), 4122 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4123 (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$lod))]>; 4124 def _I : TEX_UNIFIED_2D_ARRAY_LEVEL_base<inst, outtype, intype, 4125 (ins i64imm:$t)>; 4126} 4127 4128defm TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL 4129 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.f32.f32", 4130 Float32Regs, Float32Regs, 4131 int_nvvm_tex_unified_2d_array_level_v4f32_f32>; 4132defm TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL 4133 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.s32.f32", 4134 Int32Regs, Float32Regs, 4135 int_nvvm_tex_unified_2d_array_level_v4s32_f32>; 4136defm TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL 4137 : TEX_UNIFIED_2D_ARRAY_LEVEL<"tex.level.a2d.v4.u32.f32", 4138 Int32Regs, Float32Regs, 4139 int_nvvm_tex_unified_2d_array_level_v4u32_f32>; 4140 4141class TEX_UNIFIED_2D_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 4142 NVPTXRegClass intype, dag tex, 4143 list<dag> pattern = []> 4144 : NVPTXInst<(outs outtype:$r, outtype:$g, 4145 outtype:$b, outtype:$a), 4146 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, 4147 intype:$gradx0, intype:$gradx1, 4148 intype:$grady0, intype:$grady1)), 4149 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $y\\}]," 4150 " \\{$gradx0, $gradx1\\}, \\{$grady0, $grady1\\};", 4151 pattern>; 4152multiclass TEX_UNIFIED_2D_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 4153 NVPTXRegClass intype, Intrinsic intr> { 4154 def _R : TEX_UNIFIED_2D_ARRAY_GRAD_base< 4155 inst, outtype, intype, (ins Int64Regs:$t), 4156 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4157 (intr i64:$t, i32:$l, intype:$x, intype:$y, 4158 intype:$gradx0, intype:$gradx1, 4159 intype:$grady0, intype:$grady1))]>; 4160 def _I : TEX_UNIFIED_2D_ARRAY_GRAD_base<inst, outtype, intype, 4161 (ins i64imm:$t)>; 4162} 4163 4164defm TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD 4165 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.f32.f32", 4166 Float32Regs, Float32Regs, 4167 int_nvvm_tex_unified_2d_array_grad_v4f32_f32>; 4168defm TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD 4169 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.s32.f32", 4170 Int32Regs, Float32Regs, 4171 int_nvvm_tex_unified_2d_array_grad_v4s32_f32>; 4172defm TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD 4173 : TEX_UNIFIED_2D_ARRAY_GRAD<"tex.grad.a2d.v4.u32.f32", 4174 Int32Regs, Float32Regs, 4175 int_nvvm_tex_unified_2d_array_grad_v4u32_f32>; 4176 4177class TEX_UNIFIED_3D_base<string inst, NVPTXRegClass outtype, 4178 NVPTXRegClass intype, dag tex, list<dag> pattern = []> 4179 : NVPTXInst<(outs outtype:$r, outtype:$g, 4180 outtype:$b, outtype:$a), 4181 !con(tex, (ins intype:$x, intype:$y, intype:$z)), 4182 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", 4183 pattern>; 4184multiclass TEX_UNIFIED_3D<string inst, NVPTXRegClass outtype, 4185 NVPTXRegClass intype, Intrinsic intr> { 4186 def _R : TEX_UNIFIED_3D_base< 4187 inst, outtype, intype, (ins Int64Regs:$t), 4188 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4189 (intr i64:$t, intype:$x, intype:$y, intype:$z))]>; 4190 def _I : TEX_UNIFIED_3D_base<inst, outtype, intype, (ins i64imm:$t)>; 4191} 4192 4193defm TEX_UNIFIED_3D_F32_S32 4194 : TEX_UNIFIED_3D<"tex.3d.v4.f32.s32", Float32Regs, Int32Regs, 4195 int_nvvm_tex_unified_3d_v4f32_s32>; 4196defm TEX_UNIFIED_3D_F32_F32 4197 : TEX_UNIFIED_3D<"tex.3d.v4.f32.f32", Float32Regs, Float32Regs, 4198 int_nvvm_tex_unified_3d_v4f32_f32>; 4199defm TEX_UNIFIED_3D_S32_S32 4200 : TEX_UNIFIED_3D<"tex.3d.v4.s32.s32", Int32Regs, Int32Regs, 4201 int_nvvm_tex_unified_3d_v4s32_s32>; 4202defm TEX_UNIFIED_3D_S32_F32 4203 : TEX_UNIFIED_3D<"tex.3d.v4.s32.f32", Int32Regs, Float32Regs, 4204 int_nvvm_tex_unified_3d_v4s32_f32>; 4205defm TEX_UNIFIED_3D_U32_S32 4206 : TEX_UNIFIED_3D<"tex.3d.v4.u32.s32", Int32Regs, Int32Regs, 4207 int_nvvm_tex_unified_3d_v4u32_s32>; 4208defm TEX_UNIFIED_3D_U32_F32 4209 : TEX_UNIFIED_3D<"tex.3d.v4.u32.f32", Int32Regs, Float32Regs, 4210 int_nvvm_tex_unified_3d_v4u32_f32>; 4211 4212class TEX_UNIFIED_3D_LEVEL_base<string inst, NVPTXRegClass outtype, 4213 NVPTXRegClass intype, dag tex, 4214 list<dag> pattern = []> 4215 : NVPTXInst<(outs outtype:$r, outtype:$g, 4216 outtype:$b, outtype:$a), 4217 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), 4218 inst # " \t\\{$r, $g, $b, $a\\}," 4219 " [$t, \\{$x, $y, $z, $z\\}], $lod;", 4220 pattern>; 4221multiclass TEX_UNIFIED_3D_LEVEL<string inst, NVPTXRegClass outtype, 4222 NVPTXRegClass intype, Intrinsic intr> { 4223 def _R : TEX_UNIFIED_3D_LEVEL_base< 4224 inst, outtype, intype, (ins Int64Regs:$t), 4225 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4226 (intr i64:$t, intype:$x, intype:$y, intype:$z, intype:$lod))]>; 4227 def _I : TEX_UNIFIED_3D_LEVEL_base<inst, outtype, intype, (ins i64imm:$t)>; 4228} 4229 4230defm TEX_UNIFIED_3D_F32_F32_LEVEL 4231 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.f32.f32", Float32Regs, Float32Regs, 4232 int_nvvm_tex_unified_3d_level_v4f32_f32>; 4233defm TEX_UNIFIED_3D_S32_F32_LEVEL 4234 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.s32.f32", Int32Regs, Float32Regs, 4235 int_nvvm_tex_unified_3d_level_v4s32_f32>; 4236defm TEX_UNIFIED_3D_U32_F32_LEVEL 4237 : TEX_UNIFIED_3D_LEVEL<"tex.level.3d.v4.u32.f32", Int32Regs, Float32Regs, 4238 int_nvvm_tex_unified_3d_level_v4u32_f32>; 4239 4240class TEX_UNIFIED_3D_GRAD_base<string inst, NVPTXRegClass outtype, 4241 NVPTXRegClass intype, dag tex, 4242 list<dag> pattern = []> 4243 : NVPTXInst<(outs outtype:$r, outtype:$g, 4244 outtype:$b, outtype:$a), 4245 !con(tex, (ins intype:$x, intype:$y, intype:$z, 4246 intype:$gradx0, intype:$gradx1, 4247 intype:$gradx2, intype:$grady0, 4248 intype:$grady1, intype:$grady2)), 4249 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}]," 4250 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 4251 " \\{$grady0, $grady1, $grady2, $grady2\\};", 4252 pattern>; 4253multiclass TEX_UNIFIED_3D_GRAD<string inst, NVPTXRegClass outtype, 4254 NVPTXRegClass intype, Intrinsic intr> { 4255 def _R : TEX_UNIFIED_3D_GRAD_base< 4256 inst, outtype, intype, (ins Int64Regs:$t), 4257 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4258 (intr i64:$t, intype:$x, intype:$y, intype:$z, 4259 intype:$gradx0, intype:$gradx1, intype:$gradx2, 4260 intype:$grady0, intype:$grady1, intype:$grady2))]>; 4261 def _I : TEX_UNIFIED_3D_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 4262} 4263 4264defm TEX_UNIFIED_3D_F32_F32_GRAD 4265 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.f32.f32", Float32Regs, Float32Regs, 4266 int_nvvm_tex_unified_3d_grad_v4f32_f32>; 4267defm TEX_UNIFIED_3D_S32_F32_GRAD 4268 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.s32.f32", Int32Regs, Float32Regs, 4269 int_nvvm_tex_unified_3d_grad_v4s32_f32>; 4270defm TEX_UNIFIED_3D_U32_F32_GRAD 4271 : TEX_UNIFIED_3D_GRAD<"tex.grad.3d.v4.u32.f32", Int32Regs, Float32Regs, 4272 int_nvvm_tex_unified_3d_grad_v4u32_f32>; 4273 4274class TEX_UNIFIED_CUBE_base<string inst, NVPTXRegClass outtype, 4275 NVPTXRegClass intype, dag tex, 4276 list<dag> pattern = []> 4277 : NVPTXInst<(outs outtype:$r, outtype:$g, 4278 outtype:$b, outtype:$a), 4279 !con(tex, (ins intype:$x, intype:$y, intype:$z)), 4280 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}];", 4281 pattern>; 4282multiclass TEX_UNIFIED_CUBE<string inst, NVPTXRegClass outtype, 4283 NVPTXRegClass intype, Intrinsic intr> { 4284 def _R : TEX_UNIFIED_CUBE_base< 4285 inst, outtype, intype, (ins Int64Regs:$t), 4286 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4287 (intr i64:$t, intype:$x, intype:$y, intype:$z))]>; 4288 def _I : TEX_UNIFIED_CUBE_base<inst, outtype, intype, (ins i64imm:$t)>; 4289} 4290 4291defm TEX_UNIFIED_CUBE_F32_F32 4292 : TEX_UNIFIED_CUBE<"tex.cube.v4.f32.f32", Float32Regs, Float32Regs, 4293 int_nvvm_tex_unified_cube_v4f32_f32>; 4294defm TEX_UNIFIED_CUBE_S32_F32 4295 : TEX_UNIFIED_CUBE<"tex.cube.v4.s32.f32", Int32Regs, Float32Regs, 4296 int_nvvm_tex_unified_cube_v4s32_f32>; 4297defm TEX_UNIFIED_CUBE_U32_F32 4298 : TEX_UNIFIED_CUBE<"tex.cube.v4.u32.f32", Int32Regs, Float32Regs, 4299 int_nvvm_tex_unified_cube_v4u32_f32>; 4300 4301class TEX_UNIFIED_CUBE_LEVEL_base<string inst, NVPTXRegClass outtype, 4302 NVPTXRegClass intype, dag tex, 4303 list<dag> pattern = []> 4304 : NVPTXInst<(outs outtype:$r, outtype:$g, 4305 outtype:$b, outtype:$a), 4306 !con(tex, (ins intype:$x, intype:$y, intype:$z, intype:$lod)), 4307 inst # " \t\\{$r, $g, $b, $a\\}," 4308 " [$t, \\{$x, $y, $z, $z\\}], $lod;", 4309 pattern>; 4310multiclass TEX_UNIFIED_CUBE_LEVEL<string inst, NVPTXRegClass outtype, 4311 NVPTXRegClass intype, Intrinsic intr> { 4312 def _R : TEX_UNIFIED_CUBE_LEVEL_base< 4313 inst, outtype, intype, (ins Int64Regs:$t), 4314 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4315 (intr i64:$t, intype:$x, intype:$y, intype:$z, intype:$lod))]>; 4316 def _I : TEX_UNIFIED_CUBE_LEVEL_base<inst, outtype, intype, 4317 (ins i64imm:$t)>; 4318} 4319 4320defm TEX_UNIFIED_CUBE_F32_F32_LEVEL 4321 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.f32.f32", 4322 Float32Regs, Float32Regs, 4323 int_nvvm_tex_unified_cube_level_v4f32_f32>; 4324defm TEX_UNIFIED_CUBE_S32_F32_LEVEL 4325 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.s32.f32", 4326 Int32Regs, Float32Regs, 4327 int_nvvm_tex_unified_cube_level_v4s32_f32>; 4328defm TEX_UNIFIED_CUBE_U32_F32_LEVEL 4329 : TEX_UNIFIED_CUBE_LEVEL<"tex.level.cube.v4.u32.f32", 4330 Int32Regs, Float32Regs, 4331 int_nvvm_tex_unified_cube_level_v4u32_f32>; 4332 4333class TEX_UNIFIED_CUBE_ARRAY_base<string inst, NVPTXRegClass outtype, 4334 NVPTXRegClass intype, dag tex, 4335 list<dag> pattern = []> 4336 : NVPTXInst<(outs outtype:$r, outtype:$g, 4337 outtype:$b, outtype:$a), 4338 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z)), 4339 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}];", 4340 pattern>; 4341multiclass TEX_UNIFIED_CUBE_ARRAY<string inst, NVPTXRegClass outtype, 4342 NVPTXRegClass intype, Intrinsic intr> { 4343 def _R : TEX_UNIFIED_CUBE_ARRAY_base< 4344 inst, outtype, intype, (ins Int64Regs:$t), 4345 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4346 (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z))]>; 4347 def _I : TEX_UNIFIED_CUBE_ARRAY_base<inst, outtype, intype, 4348 (ins i64imm:$t)>; 4349} 4350 4351defm TEX_UNIFIED_CUBE_ARRAY_F32_F32 4352 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.f32.f32", Float32Regs, Float32Regs, 4353 int_nvvm_tex_unified_cube_array_v4f32_f32>; 4354defm TEX_UNIFIED_CUBE_ARRAY_S32_F32 4355 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.s32.f32", Int32Regs, Float32Regs, 4356 int_nvvm_tex_unified_cube_array_v4s32_f32>; 4357defm TEX_UNIFIED_CUBE_ARRAY_U32_F32 4358 : TEX_UNIFIED_CUBE_ARRAY<"tex.acube.v4.u32.f32", Int32Regs, Float32Regs, 4359 int_nvvm_tex_unified_cube_array_v4u32_f32>; 4360 4361class TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<string inst, NVPTXRegClass outtype, 4362 NVPTXRegClass intype, dag tex, 4363 list<dag> pattern = []> 4364 : NVPTXInst<(outs outtype:$r, outtype:$g, 4365 outtype:$b, outtype:$a), 4366 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, 4367 intype:$lod)), 4368 inst # " \t\\{$r, $g, $b, $a\\}," 4369 " [$t, \\{$l, $x, $y, $z\\}], $lod;", 4370 pattern>; 4371multiclass TEX_UNIFIED_CUBE_ARRAY_LEVEL<string inst, NVPTXRegClass outtype, 4372 NVPTXRegClass intype, Intrinsic intr> { 4373 def _R : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base< 4374 inst, outtype, intype, (ins Int64Regs:$t), 4375 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4376 (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z, intype:$lod))]>; 4377 def _I : TEX_UNIFIED_CUBE_ARRAY_LEVEL_base<inst, outtype, intype, 4378 (ins i64imm:$t)>; 4379} 4380 4381defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL 4382 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.f32.f32", 4383 Float32Regs, Float32Regs, 4384 int_nvvm_tex_unified_cube_array_level_v4f32_f32>; 4385defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL 4386 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.s32.f32", 4387 Int32Regs, Float32Regs, 4388 int_nvvm_tex_unified_cube_array_level_v4s32_f32>; 4389defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL 4390 : TEX_UNIFIED_CUBE_ARRAY_LEVEL<"tex.level.acube.v4.u32.f32", 4391 Int32Regs, Float32Regs, 4392 int_nvvm_tex_unified_cube_array_level_v4u32_f32>; 4393 4394class TEX_UNIFIED_CUBE_GRAD_base<string inst, NVPTXRegClass outtype, 4395 NVPTXRegClass intype, dag tex, 4396 list<dag> pattern = []> 4397 : NVPTXInst<(outs outtype:$r, outtype:$g, 4398 outtype:$b, outtype:$a), 4399 !con(tex, (ins intype:$x, intype:$y, intype:$z, 4400 intype:$gradx0, intype:$gradx1, 4401 intype:$gradx2, intype:$grady0, 4402 intype:$grady1, intype:$grady2)), 4403 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$x, $y, $z, $z\\}]," 4404 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 4405 " \\{$grady0, $grady1, $grady2, $grady2\\};", 4406 pattern>; 4407 4408multiclass TEX_UNIFIED_CUBE_GRAD<string inst, NVPTXRegClass outtype, 4409 NVPTXRegClass intype, Intrinsic intr> { 4410 def _R : TEX_UNIFIED_CUBE_GRAD_base< 4411 inst, outtype, intype, (ins Int64Regs:$t), 4412 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4413 (intr i64:$t, intype:$x, intype:$y, intype:$z, 4414 intype:$gradx0, intype:$gradx1, intype:$gradx2, 4415 intype:$grady0, intype:$grady1, intype:$grady2))]>; 4416 def _I : TEX_UNIFIED_CUBE_GRAD_base<inst, outtype, intype, (ins i64imm:$t)>; 4417} 4418 4419defm TEX_UNIFIED_CUBE_F32_F32_GRAD 4420 : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.f32.f32", Float32Regs, Float32Regs, 4421 int_nvvm_tex_unified_cube_grad_v4f32_f32>; 4422defm TEX_UNIFIED_CUBE_S32_F32_GRAD 4423 : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.s32.f32", Int32Regs, Float32Regs, 4424 int_nvvm_tex_unified_cube_grad_v4s32_f32>; 4425defm TEX_UNIFIED_CUBE_U32_F32_GRAD 4426 : TEX_UNIFIED_CUBE_GRAD<"tex.grad.cube.v4.u32.f32", Int32Regs, Float32Regs, 4427 int_nvvm_tex_unified_cube_grad_v4u32_f32>; 4428 4429class TEX_UNIFIED_CUBE_ARRAY_GRAD_base<string inst, NVPTXRegClass outtype, 4430 NVPTXRegClass intype, dag tex, 4431 list<dag> pattern = []> 4432 : NVPTXInst<(outs outtype:$r, outtype:$g, 4433 outtype:$b, outtype:$a), 4434 !con(tex, (ins Int32Regs:$l, intype:$x, intype:$y, intype:$z, 4435 intype:$gradx0, intype:$gradx1, 4436 intype:$gradx2, intype:$grady0, 4437 intype:$grady1, intype:$grady2)), 4438 inst # " \t\\{$r, $g, $b, $a\\}, [$t, \\{$l, $x, $y, $z\\}]," 4439 " \\{$gradx0, $gradx1, $gradx2, $gradx2\\}," 4440 " \\{$grady0, $grady1, $grady2, $grady2\\};", 4441 pattern>; 4442multiclass TEX_UNIFIED_CUBE_ARRAY_GRAD<string inst, NVPTXRegClass outtype, 4443 NVPTXRegClass intype, Intrinsic intr> { 4444 def _R : TEX_UNIFIED_CUBE_ARRAY_GRAD_base< 4445 inst, outtype, intype, (ins Int64Regs:$t), 4446 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4447 (intr i64:$t, i32:$l, intype:$x, intype:$y, intype:$z, 4448 intype:$gradx0, intype:$gradx1, 4449 intype:$gradx2, intype:$grady0, 4450 intype:$grady1, intype:$grady2))]>; 4451 def _I : TEX_UNIFIED_CUBE_ARRAY_GRAD_base<inst, outtype, intype, 4452 (ins i64imm:$t)>; 4453} 4454 4455defm TEX_UNIFIED_CUBE_ARRAY_F32_F32_GRAD 4456 : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.f32.f32", 4457 Float32Regs, Float32Regs, 4458 int_nvvm_tex_unified_cube_array_grad_v4f32_f32>; 4459defm TEX_UNIFIED_CUBE_ARRAY_S32_F32_GRAD 4460 : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.s32.f32", 4461 Int32Regs, Float32Regs, 4462 int_nvvm_tex_unified_cube_array_grad_v4s32_f32>; 4463defm TEX_UNIFIED_CUBE_ARRAY_U32_F32_GRAD 4464 : TEX_UNIFIED_CUBE_ARRAY_GRAD<"tex.grad.acube.v4.u32.f32", 4465 Int32Regs, Float32Regs, 4466 int_nvvm_tex_unified_cube_array_grad_v4u32_f32>; 4467 4468class TLD4_UNIFIED_2D_base<string inst, NVPTXRegClass outtype, 4469 NVPTXRegClass intype, dag tex, 4470 list<dag> pattern = []> 4471 : NVPTXInst<(outs outtype:$v0, outtype:$v1, 4472 outtype:$v2, outtype:$v3), 4473 !con(tex, (ins intype:$x, intype:$y)), 4474 inst # " \t\\{$v0, $v1, $v2, $v3\\}, [$t, \\{$x, $y\\}];", 4475 pattern>; 4476multiclass TLD4_UNIFIED_2D<string inst, NVPTXRegClass outtype, 4477 NVPTXRegClass intype, Intrinsic intr> { 4478 def _R : TLD4_UNIFIED_2D_base< 4479 inst, outtype, intype, (ins Int64Regs:$t), 4480 [(set outtype:$v0, outtype:$v1, outtype:$v2, outtype:$v3, 4481 (intr i64:$t, intype:$x, intype:$y))]>; 4482 def _I : TLD4_UNIFIED_2D_base<inst, outtype, intype, (ins i64imm:$t)>; 4483} 4484 4485defm TLD4_UNIFIED_R_2D_F32_F32 4486 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.f32.f32", Float32Regs, Float32Regs, 4487 int_nvvm_tld4_unified_r_2d_v4f32_f32>; 4488defm TLD4_UNIFIED_G_2D_F32_F32 4489 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.f32.f32", Float32Regs, Float32Regs, 4490 int_nvvm_tld4_unified_g_2d_v4f32_f32>; 4491defm TLD4_UNIFIED_B_2D_F32_F32 4492 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.f32.f32", Float32Regs, Float32Regs, 4493 int_nvvm_tld4_unified_b_2d_v4f32_f32>; 4494defm TLD4_UNIFIED_A_2D_F32_F32 4495 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.f32.f32", Float32Regs, Float32Regs, 4496 int_nvvm_tld4_unified_a_2d_v4f32_f32>; 4497 4498defm TLD4_UNIFIED_R_2D_S32_F32 4499 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.s32.f32", Int32Regs, Float32Regs, 4500 int_nvvm_tld4_unified_r_2d_v4s32_f32>; 4501defm TLD4_UNIFIED_G_2D_S32_F32 4502 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.s32.f32", Int32Regs, Float32Regs, 4503 int_nvvm_tld4_unified_g_2d_v4s32_f32>; 4504defm TLD4_UNIFIED_B_2D_S32_F32 4505 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.s32.f32", Int32Regs, Float32Regs, 4506 int_nvvm_tld4_unified_b_2d_v4s32_f32>; 4507defm TLD4_UNIFIED_A_2D_S32_F32 4508 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.s32.f32", Int32Regs, Float32Regs, 4509 int_nvvm_tld4_unified_a_2d_v4s32_f32>; 4510 4511defm TLD4_UNIFIED_R_2D_U32_F32 4512 : TLD4_UNIFIED_2D<"tld4.r.2d.v4.u32.f32", Int32Regs, Float32Regs, 4513 int_nvvm_tld4_unified_r_2d_v4u32_f32>; 4514defm TLD4_UNIFIED_G_2D_U32_F32 4515 : TLD4_UNIFIED_2D<"tld4.g.2d.v4.u32.f32", Int32Regs, Float32Regs, 4516 int_nvvm_tld4_unified_g_2d_v4u32_f32>; 4517defm TLD4_UNIFIED_B_2D_U32_F32 4518 : TLD4_UNIFIED_2D<"tld4.b.2d.v4.u32.f32", Int32Regs, Float32Regs, 4519 int_nvvm_tld4_unified_b_2d_v4u32_f32>; 4520defm TLD4_UNIFIED_A_2D_U32_F32 4521 : TLD4_UNIFIED_2D<"tld4.a.2d.v4.u32.f32", Int32Regs, Float32Regs, 4522 int_nvvm_tld4_unified_a_2d_v4u32_f32>; 4523 4524} 4525 4526 4527 4528//=== Surface load instructions 4529 4530let IsSuld = true in { 4531 4532class SULD_1D_base<string inst, NVPTXRegClass outtype, dag surf, 4533 list<dag> pattern = []> 4534 : NVPTXInst<(outs outtype:$r), 4535 !con(surf, (ins Int32Regs:$x)), 4536 inst # " \\{$r\\}, [$s, \\{$x\\}];", 4537 pattern>; 4538multiclass SULD_1D<string inst, NVPTXRegClass outtype> { 4539 defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); 4540 4541 def _R : SULD_1D_base<inst, outtype, (ins Int64Regs:$s), 4542 [(set outtype:$r, (intr i64:$s, i32:$x))]>; 4543 def _I : SULD_1D_base<inst, outtype, (ins i64imm:$s)>; 4544} 4545 4546defm SULD_1D_I8_CLAMP : SULD_1D<"suld.b.1d.b8.clamp", Int16Regs>; 4547defm SULD_1D_I16_CLAMP : SULD_1D<"suld.b.1d.b16.clamp", Int16Regs>; 4548defm SULD_1D_I32_CLAMP : SULD_1D<"suld.b.1d.b32.clamp", Int32Regs>; 4549defm SULD_1D_I64_CLAMP : SULD_1D<"suld.b.1d.b64.clamp", Int64Regs>; 4550 4551defm SULD_1D_I8_TRAP : SULD_1D<"suld.b.1d.b8.trap", Int16Regs>; 4552defm SULD_1D_I16_TRAP : SULD_1D<"suld.b.1d.b16.trap", Int16Regs>; 4553defm SULD_1D_I32_TRAP : SULD_1D<"suld.b.1d.b32.trap", Int32Regs>; 4554defm SULD_1D_I64_TRAP : SULD_1D<"suld.b.1d.b64.trap", Int64Regs>; 4555 4556defm SULD_1D_I8_ZERO : SULD_1D<"suld.b.1d.b8.zero", Int16Regs>; 4557defm SULD_1D_I16_ZERO : SULD_1D<"suld.b.1d.b16.zero", Int16Regs>; 4558defm SULD_1D_I32_ZERO : SULD_1D<"suld.b.1d.b32.zero", Int32Regs>; 4559defm SULD_1D_I64_ZERO : SULD_1D<"suld.b.1d.b64.zero", Int64Regs>; 4560 4561class SULD_1D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf, 4562 list<dag> pattern = []> 4563 : NVPTXInst<(outs outtype:$r), 4564 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 4565 inst # " \\{$r\\}, [$s, \\{$l, $x\\}];", 4566 pattern>; 4567multiclass SULD_1D_ARRAY<string inst, NVPTXRegClass outtype> { 4568 defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); 4569 4570 def _R : SULD_1D_ARRAY_base<inst, outtype, (ins Int64Regs:$s), 4571 [(set outtype:$r, 4572 (intr i64:$s, i32:$l, i32:$x))]>; 4573 def _I : SULD_1D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; 4574} 4575 4576defm SULD_1D_ARRAY_I8_CLAMP 4577 : SULD_1D_ARRAY<"suld.b.a1d.b8.clamp", Int16Regs>; 4578defm SULD_1D_ARRAY_I16_CLAMP 4579 : SULD_1D_ARRAY<"suld.b.a1d.b16.clamp", Int16Regs>; 4580defm SULD_1D_ARRAY_I32_CLAMP 4581 : SULD_1D_ARRAY<"suld.b.a1d.b32.clamp", Int32Regs>; 4582defm SULD_1D_ARRAY_I64_CLAMP 4583 : SULD_1D_ARRAY<"suld.b.a1d.b64.clamp", Int64Regs>; 4584 4585defm SULD_1D_ARRAY_I8_TRAP 4586 : SULD_1D_ARRAY<"suld.b.a1d.b8.trap", Int16Regs>; 4587defm SULD_1D_ARRAY_I16_TRAP 4588 : SULD_1D_ARRAY<"suld.b.a1d.b16.trap", Int16Regs>; 4589defm SULD_1D_ARRAY_I32_TRAP 4590 : SULD_1D_ARRAY<"suld.b.a1d.b32.trap", Int32Regs>; 4591defm SULD_1D_ARRAY_I64_TRAP 4592 : SULD_1D_ARRAY<"suld.b.a1d.b64.trap", Int64Regs>; 4593 4594defm SULD_1D_ARRAY_I8_ZERO 4595 : SULD_1D_ARRAY<"suld.b.a1d.b8.zero", Int16Regs>; 4596defm SULD_1D_ARRAY_I16_ZERO 4597 : SULD_1D_ARRAY<"suld.b.a1d.b16.zero", Int16Regs>; 4598defm SULD_1D_ARRAY_I32_ZERO 4599 : SULD_1D_ARRAY<"suld.b.a1d.b32.zero", Int32Regs>; 4600defm SULD_1D_ARRAY_I64_ZERO 4601 : SULD_1D_ARRAY<"suld.b.a1d.b64.zero", Int64Regs>; 4602 4603class SULD_2D_base<string inst, NVPTXRegClass outtype, dag surf, 4604 list<dag> pattern = []> 4605 : NVPTXInst<(outs outtype:$r), 4606 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 4607 inst # " \\{$r\\}, [$s, \\{$x, $y\\}];", 4608 pattern>; 4609multiclass SULD_2D<string inst, NVPTXRegClass outtype> { 4610 defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); 4611 4612 def _R : SULD_2D_base<inst, outtype, (ins Int64Regs:$s), 4613 [(set outtype:$r, (intr i64:$s, i32:$x, i32:$y))]>; 4614 def _I : SULD_2D_base<inst, outtype, (ins i64imm:$s)>; 4615} 4616 4617defm SULD_2D_I8_CLAMP : SULD_2D<"suld.b.2d.b8.clamp", Int16Regs>; 4618defm SULD_2D_I16_CLAMP : SULD_2D<"suld.b.2d.b16.clamp", Int16Regs>; 4619defm SULD_2D_I32_CLAMP : SULD_2D<"suld.b.2d.b32.clamp", Int32Regs>; 4620defm SULD_2D_I64_CLAMP : SULD_2D<"suld.b.2d.b64.clamp", Int64Regs>; 4621 4622defm SULD_2D_I8_TRAP : SULD_2D<"suld.b.2d.b8.trap", Int16Regs>; 4623defm SULD_2D_I16_TRAP : SULD_2D<"suld.b.2d.b16.trap", Int16Regs>; 4624defm SULD_2D_I32_TRAP : SULD_2D<"suld.b.2d.b32.trap", Int32Regs>; 4625defm SULD_2D_I64_TRAP : SULD_2D<"suld.b.2d.b64.trap", Int64Regs>; 4626 4627defm SULD_2D_I8_ZERO : SULD_2D<"suld.b.2d.b8.zero", Int16Regs>; 4628defm SULD_2D_I16_ZERO : SULD_2D<"suld.b.2d.b16.zero", Int16Regs>; 4629defm SULD_2D_I32_ZERO : SULD_2D<"suld.b.2d.b32.zero", Int32Regs>; 4630defm SULD_2D_I64_ZERO : SULD_2D<"suld.b.2d.b64.zero", Int64Regs>; 4631 4632class SULD_2D_ARRAY_base<string inst, NVPTXRegClass outtype, dag surf, 4633 list<dag> pattern = []> 4634 : NVPTXInst<(outs outtype:$r), 4635 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 4636 inst # " \\{$r\\}, [$s, \\{$l, $x, $y, $y\\}];", 4637 pattern>; 4638multiclass SULD_2D_ARRAY<string inst, NVPTXRegClass outtype> { 4639 defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); 4640 4641 def _R : SULD_2D_ARRAY_base<inst, outtype, (ins Int64Regs:$s), 4642 [(set outtype:$r, 4643 (intr i64:$s, i32:$l, i32:$x, i32:$y))]>; 4644 def _I : SULD_2D_ARRAY_base<inst, outtype, (ins i64imm:$s)>; 4645} 4646 4647defm SULD_2D_ARRAY_I8_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b8.clamp", Int16Regs>; 4648defm SULD_2D_ARRAY_I16_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b16.clamp", Int16Regs>; 4649defm SULD_2D_ARRAY_I32_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b32.clamp", Int32Regs>; 4650defm SULD_2D_ARRAY_I64_CLAMP : SULD_2D_ARRAY<"suld.b.a2d.b64.clamp", Int64Regs>; 4651 4652defm SULD_2D_ARRAY_I8_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b8.trap", Int16Regs>; 4653defm SULD_2D_ARRAY_I16_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b16.trap", Int16Regs>; 4654defm SULD_2D_ARRAY_I32_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b32.trap", Int32Regs>; 4655defm SULD_2D_ARRAY_I64_TRAP : SULD_2D_ARRAY<"suld.b.a2d.b64.trap", Int64Regs>; 4656 4657defm SULD_2D_ARRAY_I8_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b8.zero", Int16Regs>; 4658defm SULD_2D_ARRAY_I16_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b16.zero", Int16Regs>; 4659defm SULD_2D_ARRAY_I32_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b32.zero", Int32Regs>; 4660defm SULD_2D_ARRAY_I64_ZERO : SULD_2D_ARRAY<"suld.b.a2d.b64.zero", Int64Regs>; 4661 4662class SULD_3D_base<string inst, NVPTXRegClass outtype, dag surf, 4663 list<dag> pattern = []> 4664 : NVPTXInst<(outs outtype:$r), 4665 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 4666 inst # " \\{$r\\}, [$s, \\{$x, $y, $z, $z\\}];", 4667 pattern>; 4668multiclass SULD_3D<string inst, NVPTXRegClass outtype> { 4669 defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); 4670 4671 def _R : SULD_3D_base<inst, outtype, (ins Int64Regs:$s), 4672 [(set outtype:$r, 4673 (intr i64:$s, i32:$x, i32:$y, i32:$z))]>; 4674 def _I : SULD_3D_base<inst, outtype, (ins i64imm:$s)>; 4675} 4676 4677defm SULD_3D_I8_CLAMP : SULD_3D<"suld.b.3d.b8.clamp", Int16Regs>; 4678defm SULD_3D_I16_CLAMP : SULD_3D<"suld.b.3d.b16.clamp", Int16Regs>; 4679defm SULD_3D_I32_CLAMP : SULD_3D<"suld.b.3d.b32.clamp", Int32Regs>; 4680defm SULD_3D_I64_CLAMP : SULD_3D<"suld.b.3d.b64.clamp", Int64Regs>; 4681 4682defm SULD_3D_I8_TRAP : SULD_3D<"suld.b.3d.b8.trap", Int16Regs>; 4683defm SULD_3D_I16_TRAP : SULD_3D<"suld.b.3d.b16.trap", Int16Regs>; 4684defm SULD_3D_I32_TRAP : SULD_3D<"suld.b.3d.b32.trap", Int32Regs>; 4685defm SULD_3D_I64_TRAP : SULD_3D<"suld.b.3d.b64.trap", Int64Regs>; 4686 4687defm SULD_3D_I8_ZERO : SULD_3D<"suld.b.3d.b8.zero", Int16Regs>; 4688defm SULD_3D_I16_ZERO : SULD_3D<"suld.b.3d.b16.zero", Int16Regs>; 4689defm SULD_3D_I32_ZERO : SULD_3D<"suld.b.3d.b32.zero", Int32Regs>; 4690defm SULD_3D_I64_ZERO : SULD_3D<"suld.b.3d.b64.zero", Int64Regs>; 4691} 4692 4693let IsSuld = 2 in { 4694 4695class SULD_1D_V2_base<string inst, NVPTXRegClass outtype, dag surf, 4696 list<dag> pattern = []> 4697 : NVPTXInst<(outs outtype:$r, outtype:$g), 4698 !con(surf, (ins Int32Regs:$x)), 4699 inst # " \\{$r, $g\\}, [$s, \\{$x\\}];", 4700 pattern>; 4701multiclass SULD_1D_V2<string inst, NVPTXRegClass outtype> { 4702 defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); 4703 4704 def _R : SULD_1D_V2_base<inst, outtype, (ins Int64Regs:$s), 4705 [(set outtype:$r, outtype:$g, 4706 (intr i64:$s, i32:$x))]>; 4707 def _I : SULD_1D_V2_base<inst, outtype, (ins i64imm:$s)>; 4708} 4709 4710defm SULD_1D_V2I8_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b8.clamp", Int16Regs>; 4711defm SULD_1D_V2I16_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b16.clamp", Int16Regs>; 4712defm SULD_1D_V2I32_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b32.clamp", Int32Regs>; 4713defm SULD_1D_V2I64_CLAMP : SULD_1D_V2<"suld.b.1d.v2.b64.clamp", Int64Regs>; 4714 4715defm SULD_1D_V2I8_TRAP : SULD_1D_V2<"suld.b.1d.v2.b8.trap", Int16Regs>; 4716defm SULD_1D_V2I16_TRAP : SULD_1D_V2<"suld.b.1d.v2.b16.trap", Int16Regs>; 4717defm SULD_1D_V2I32_TRAP : SULD_1D_V2<"suld.b.1d.v2.b32.trap", Int32Regs>; 4718defm SULD_1D_V2I64_TRAP : SULD_1D_V2<"suld.b.1d.v2.b64.trap", Int64Regs>; 4719 4720defm SULD_1D_V2I8_ZERO : SULD_1D_V2<"suld.b.1d.v2.b8.zero", Int16Regs>; 4721defm SULD_1D_V2I16_ZERO : SULD_1D_V2<"suld.b.1d.v2.b16.zero", Int16Regs>; 4722defm SULD_1D_V2I32_ZERO : SULD_1D_V2<"suld.b.1d.v2.b32.zero", Int32Regs>; 4723defm SULD_1D_V2I64_ZERO : SULD_1D_V2<"suld.b.1d.v2.b64.zero", Int64Regs>; 4724 4725class SULD_1D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf, 4726 list<dag> pattern = []> 4727 : NVPTXInst<(outs outtype:$r, outtype:$g), 4728 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 4729 inst # " \\{$r, $g\\}, [$s, \\{$l, $x\\}];", 4730 pattern>; 4731multiclass SULD_1D_ARRAY_V2<string inst, NVPTXRegClass outtype> { 4732 defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); 4733 4734 def _R : SULD_1D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s), 4735 [(set outtype:$r, outtype:$g, 4736 (intr i64:$s, i32:$l, i32:$x))]>; 4737 def _I : SULD_1D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; 4738} 4739 4740defm SULD_1D_ARRAY_V2I8_CLAMP 4741 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.clamp", Int16Regs>; 4742defm SULD_1D_ARRAY_V2I16_CLAMP 4743 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.clamp", Int16Regs>; 4744defm SULD_1D_ARRAY_V2I32_CLAMP 4745 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.clamp", Int32Regs>; 4746defm SULD_1D_ARRAY_V2I64_CLAMP 4747 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.clamp", Int64Regs>; 4748 4749defm SULD_1D_ARRAY_V2I8_TRAP 4750 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.trap", Int16Regs>; 4751defm SULD_1D_ARRAY_V2I16_TRAP 4752 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.trap", Int16Regs>; 4753defm SULD_1D_ARRAY_V2I32_TRAP 4754 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.trap", Int32Regs>; 4755defm SULD_1D_ARRAY_V2I64_TRAP 4756 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.trap", Int64Regs>; 4757 4758defm SULD_1D_ARRAY_V2I8_ZERO 4759 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b8.zero", Int16Regs>; 4760defm SULD_1D_ARRAY_V2I16_ZERO 4761 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b16.zero", Int16Regs>; 4762defm SULD_1D_ARRAY_V2I32_ZERO 4763 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b32.zero", Int32Regs>; 4764defm SULD_1D_ARRAY_V2I64_ZERO 4765 : SULD_1D_ARRAY_V2<"suld.b.a1d.v2.b64.zero", Int64Regs>; 4766 4767class SULD_2D_V2_base<string inst, NVPTXRegClass outtype, dag surf, 4768 list<dag> pattern = []> 4769 : NVPTXInst<(outs outtype:$r, outtype:$g), 4770 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 4771 inst # " \\{$r, $g\\}, [$s, \\{$x, $y\\}];", 4772 pattern>; 4773multiclass SULD_2D_V2<string inst, NVPTXRegClass outtype> { 4774 defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); 4775 4776 def _R : SULD_2D_V2_base<inst, outtype, (ins Int64Regs:$s), 4777 [(set outtype:$r, outtype:$g, 4778 (intr i64:$s, i32:$x, i32:$y))]>; 4779 def _I : SULD_2D_V2_base<inst, outtype, (ins i64imm:$s)>; 4780} 4781 4782defm SULD_2D_V2I8_CLAMP 4783 : SULD_2D_V2<"suld.b.2d.v2.b8.clamp", Int16Regs>; 4784defm SULD_2D_V2I16_CLAMP 4785 : SULD_2D_V2<"suld.b.2d.v2.b16.clamp", Int16Regs>; 4786defm SULD_2D_V2I32_CLAMP 4787 : SULD_2D_V2<"suld.b.2d.v2.b32.clamp", Int32Regs>; 4788defm SULD_2D_V2I64_CLAMP 4789 : SULD_2D_V2<"suld.b.2d.v2.b64.clamp", Int64Regs>; 4790 4791defm SULD_2D_V2I8_TRAP 4792 : SULD_2D_V2<"suld.b.2d.v2.b8.trap", Int16Regs>; 4793defm SULD_2D_V2I16_TRAP 4794 : SULD_2D_V2<"suld.b.2d.v2.b16.trap", Int16Regs>; 4795defm SULD_2D_V2I32_TRAP 4796 : SULD_2D_V2<"suld.b.2d.v2.b32.trap", Int32Regs>; 4797defm SULD_2D_V2I64_TRAP 4798 : SULD_2D_V2<"suld.b.2d.v2.b64.trap", Int64Regs>; 4799 4800defm SULD_2D_V2I8_ZERO 4801 : SULD_2D_V2<"suld.b.2d.v2.b8.zero", Int16Regs>; 4802defm SULD_2D_V2I16_ZERO 4803 : SULD_2D_V2<"suld.b.2d.v2.b16.zero", Int16Regs>; 4804defm SULD_2D_V2I32_ZERO 4805 : SULD_2D_V2<"suld.b.2d.v2.b32.zero", Int32Regs>; 4806defm SULD_2D_V2I64_ZERO 4807 : SULD_2D_V2<"suld.b.2d.v2.b64.zero", Int64Regs>; 4808 4809class SULD_2D_ARRAY_V2_base<string inst, NVPTXRegClass outtype, dag surf, 4810 list<dag> pattern = []> 4811 : NVPTXInst<(outs outtype:$r, outtype:$g), 4812 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 4813 inst # " \\{$r, $g\\}, [$s, \\{$l, $x, $y, $y\\}];", 4814 pattern>; 4815multiclass SULD_2D_ARRAY_V2<string inst, NVPTXRegClass outtype> { 4816 defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); 4817 4818 def _R : SULD_2D_ARRAY_V2_base<inst, outtype, (ins Int64Regs:$s), 4819 [(set outtype:$r, outtype:$g, 4820 (intr i64:$s, i32:$l, i32:$x, i32:$y))]>; 4821 def _I : SULD_2D_ARRAY_V2_base<inst, outtype, (ins i64imm:$s)>; 4822} 4823 4824defm SULD_2D_ARRAY_V2I8_CLAMP 4825 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.clamp", Int16Regs>; 4826defm SULD_2D_ARRAY_V2I16_CLAMP 4827 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.clamp", Int16Regs>; 4828defm SULD_2D_ARRAY_V2I32_CLAMP 4829 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.clamp", Int32Regs>; 4830defm SULD_2D_ARRAY_V2I64_CLAMP 4831 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.clamp", Int64Regs>; 4832 4833defm SULD_2D_ARRAY_V2I8_TRAP 4834 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.trap", Int16Regs>; 4835defm SULD_2D_ARRAY_V2I16_TRAP 4836 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.trap", Int16Regs>; 4837defm SULD_2D_ARRAY_V2I32_TRAP 4838 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.trap", Int32Regs>; 4839defm SULD_2D_ARRAY_V2I64_TRAP 4840 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.trap", Int64Regs>; 4841 4842defm SULD_2D_ARRAY_V2I8_ZERO 4843 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b8.zero", Int16Regs>; 4844defm SULD_2D_ARRAY_V2I16_ZERO 4845 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b16.zero", Int16Regs>; 4846defm SULD_2D_ARRAY_V2I32_ZERO 4847 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b32.zero", Int32Regs>; 4848defm SULD_2D_ARRAY_V2I64_ZERO 4849 : SULD_2D_ARRAY_V2<"suld.b.a2d.v2.b64.zero", Int64Regs>; 4850 4851class SULD_3D_V2_base<string inst, NVPTXRegClass outtype, dag surf, 4852 list<dag> pattern = []> 4853 : NVPTXInst<(outs outtype:$r, outtype:$g), 4854 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 4855 inst # " \\{$r, $g\\}, [$s, \\{$x, $y, $z, $z\\}];", 4856 pattern>; 4857multiclass SULD_3D_V2<string inst, NVPTXRegClass outtype> { 4858 defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); 4859 4860 def _R : SULD_3D_V2_base<inst, outtype, (ins Int64Regs:$s), 4861 [(set outtype:$r, outtype:$g, 4862 (intr i64:$s, i32:$x, i32:$y, i32:$z))]>; 4863 def _I : SULD_3D_V2_base<inst, outtype, (ins i64imm:$s)>; 4864} 4865 4866defm SULD_3D_V2I8_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b8.clamp", Int16Regs>; 4867defm SULD_3D_V2I16_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b16.clamp", Int16Regs>; 4868defm SULD_3D_V2I32_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b32.clamp", Int32Regs>; 4869defm SULD_3D_V2I64_CLAMP : SULD_3D_V2<"suld.b.3d.v2.b64.clamp", Int64Regs>; 4870 4871defm SULD_3D_V2I8_TRAP : SULD_3D_V2<"suld.b.3d.v2.b8.trap", Int16Regs>; 4872defm SULD_3D_V2I16_TRAP : SULD_3D_V2<"suld.b.3d.v2.b16.trap", Int16Regs>; 4873defm SULD_3D_V2I32_TRAP : SULD_3D_V2<"suld.b.3d.v2.b32.trap", Int32Regs>; 4874defm SULD_3D_V2I64_TRAP : SULD_3D_V2<"suld.b.3d.v2.b64.trap", Int64Regs>; 4875 4876defm SULD_3D_V2I8_ZERO : SULD_3D_V2<"suld.b.3d.v2.b8.zero", Int16Regs>; 4877defm SULD_3D_V2I16_ZERO : SULD_3D_V2<"suld.b.3d.v2.b16.zero", Int16Regs>; 4878defm SULD_3D_V2I32_ZERO : SULD_3D_V2<"suld.b.3d.v2.b32.zero", Int32Regs>; 4879defm SULD_3D_V2I64_ZERO : SULD_3D_V2<"suld.b.3d.v2.b64.zero", Int64Regs>; 4880 4881} 4882 4883let IsSuld = 3 in { 4884 4885class SULD_1D_V4_base<string inst, NVPTXRegClass outtype, dag surf, 4886 list<dag> pattern = []> 4887 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4888 !con(surf, (ins Int32Regs:$x)), 4889 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x\\}];", 4890 pattern>; 4891multiclass SULD_1D_V4<string inst, NVPTXRegClass outtype> { 4892 defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); 4893 4894 def _R : SULD_1D_V4_base<inst, outtype, (ins Int64Regs:$s), 4895 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4896 (intr i64:$s, i32:$x))]>; 4897 def _I : SULD_1D_V4_base<inst, outtype, (ins i64imm:$s)>; 4898} 4899 4900defm SULD_1D_V4I8_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b8.clamp", Int16Regs>; 4901defm SULD_1D_V4I16_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b16.clamp", Int16Regs>; 4902defm SULD_1D_V4I32_CLAMP : SULD_1D_V4<"suld.b.1d.v4.b32.clamp", Int32Regs>; 4903 4904defm SULD_1D_V4I8_TRAP : SULD_1D_V4<"suld.b.1d.v4.b8.trap", Int16Regs>; 4905defm SULD_1D_V4I16_TRAP : SULD_1D_V4<"suld.b.1d.v4.b16.trap", Int16Regs>; 4906defm SULD_1D_V4I32_TRAP : SULD_1D_V4<"suld.b.1d.v4.b32.trap", Int32Regs>; 4907 4908defm SULD_1D_V4I8_ZERO : SULD_1D_V4<"suld.b.1d.v4.b8.zero", Int16Regs>; 4909defm SULD_1D_V4I16_ZERO : SULD_1D_V4<"suld.b.1d.v4.b16.zero", Int16Regs>; 4910defm SULD_1D_V4I32_ZERO : SULD_1D_V4<"suld.b.1d.v4.b32.zero", Int32Regs>; 4911 4912class SULD_1D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf, 4913 list<dag> pattern = []> 4914 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4915 !con(surf, (ins Int32Regs:$l, Int32Regs:$x)), 4916 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x\\}];", 4917 pattern>; 4918multiclass SULD_1D_ARRAY_V4<string inst, NVPTXRegClass outtype> { 4919 defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); 4920 4921 def _R : SULD_1D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s), 4922 [(set outtype:$r, outtype:$g, outtype:$b, 4923 outtype:$a, 4924 (intr i64:$s, i32:$l, i32:$x))]>; 4925 def _I : SULD_1D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; 4926} 4927 4928defm SULD_1D_ARRAY_V4I8_CLAMP 4929 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.clamp", Int16Regs>; 4930defm SULD_1D_ARRAY_V4I16_CLAMP 4931 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.clamp", Int16Regs>; 4932defm SULD_1D_ARRAY_V4I32_CLAMP 4933 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.clamp", Int32Regs>; 4934 4935defm SULD_1D_ARRAY_V4I8_TRAP 4936 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.trap", Int16Regs>; 4937defm SULD_1D_ARRAY_V4I16_TRAP 4938 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.trap", Int16Regs>; 4939defm SULD_1D_ARRAY_V4I32_TRAP 4940 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.trap", Int32Regs>; 4941 4942defm SULD_1D_ARRAY_V4I8_ZERO 4943 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b8.zero", Int16Regs>; 4944defm SULD_1D_ARRAY_V4I16_ZERO 4945 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b16.zero", Int16Regs>; 4946defm SULD_1D_ARRAY_V4I32_ZERO 4947 : SULD_1D_ARRAY_V4<"suld.b.a1d.v4.b32.zero", Int32Regs>; 4948 4949class SULD_2D_V4_base<string inst, NVPTXRegClass outtype, dag surf, 4950 list<dag> pattern = []> 4951 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4952 !con(surf, (ins Int32Regs:$x, Int32Regs:$y)), 4953 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y\\}];", 4954 pattern>; 4955multiclass SULD_2D_V4<string inst, NVPTXRegClass outtype> { 4956 defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); 4957 4958 def _R : SULD_2D_V4_base<inst, outtype, (ins Int64Regs:$s), 4959 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 4960 (intr i64:$s, i32:$x, i32:$y))]>; 4961 def _I : SULD_2D_V4_base<inst, outtype, (ins i64imm:$s)>; 4962} 4963 4964defm SULD_2D_V4I8_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b8.clamp", Int16Regs>; 4965defm SULD_2D_V4I16_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b16.clamp", Int16Regs>; 4966defm SULD_2D_V4I32_CLAMP : SULD_2D_V4<"suld.b.2d.v4.b32.clamp", Int32Regs>; 4967 4968defm SULD_2D_V4I8_TRAP : SULD_2D_V4<"suld.b.2d.v4.b8.trap", Int16Regs>; 4969defm SULD_2D_V4I16_TRAP : SULD_2D_V4<"suld.b.2d.v4.b16.trap", Int16Regs>; 4970defm SULD_2D_V4I32_TRAP : SULD_2D_V4<"suld.b.2d.v4.b32.trap", Int32Regs>; 4971 4972defm SULD_2D_V4I8_ZERO : SULD_2D_V4<"suld.b.2d.v4.b8.zero", Int16Regs>; 4973defm SULD_2D_V4I16_ZERO : SULD_2D_V4<"suld.b.2d.v4.b16.zero", Int16Regs>; 4974defm SULD_2D_V4I32_ZERO : SULD_2D_V4<"suld.b.2d.v4.b32.zero", Int32Regs>; 4975 4976class SULD_2D_ARRAY_V4_base<string inst, NVPTXRegClass outtype, dag surf, 4977 list<dag> pattern = []> 4978 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 4979 !con(surf, (ins Int32Regs:$l, Int32Regs:$x, Int32Regs:$y)), 4980 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$l, $x, $y, $y\\}];", 4981 pattern>; 4982multiclass SULD_2D_ARRAY_V4<string inst, NVPTXRegClass outtype> { 4983 defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); 4984 4985 def _R : SULD_2D_ARRAY_V4_base<inst, outtype, (ins Int64Regs:$s), 4986 [(set outtype:$r, outtype:$g, outtype:$b, 4987 outtype:$a, 4988 (intr i64:$s, i32:$l, i32:$x, i32:$y))]>; 4989 def _I : SULD_2D_ARRAY_V4_base<inst, outtype, (ins i64imm:$s)>; 4990} 4991 4992defm SULD_2D_ARRAY_V4I8_CLAMP 4993 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.clamp", Int16Regs>; 4994defm SULD_2D_ARRAY_V4I16_CLAMP 4995 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.clamp", Int16Regs>; 4996defm SULD_2D_ARRAY_V4I32_CLAMP 4997 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.clamp", Int32Regs>; 4998 4999defm SULD_2D_ARRAY_V4I8_TRAP 5000 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.trap", Int16Regs>; 5001defm SULD_2D_ARRAY_V4I16_TRAP 5002 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.trap", Int16Regs>; 5003defm SULD_2D_ARRAY_V4I32_TRAP 5004 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.trap", Int32Regs>; 5005 5006defm SULD_2D_ARRAY_V4I8_ZERO 5007 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b8.zero", Int16Regs>; 5008defm SULD_2D_ARRAY_V4I16_ZERO 5009 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b16.zero", Int16Regs>; 5010defm SULD_2D_ARRAY_V4I32_ZERO 5011 : SULD_2D_ARRAY_V4<"suld.b.a2d.v4.b32.zero", Int32Regs>; 5012 5013class SULD_3D_V4_base<string inst, NVPTXRegClass outtype, dag surf, 5014 list<dag> pattern = []> 5015 : NVPTXInst<(outs outtype:$r, outtype:$g, outtype:$b, outtype:$a), 5016 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z)), 5017 inst # " \\{$r, $g, $b, $a\\}, [$s, \\{$x, $y, $z, $z\\}];", 5018 pattern>; 5019multiclass SULD_3D_V4<string inst, NVPTXRegClass outtype> { 5020 defvar intr = !cast<Intrinsic>("int_nvvm_" # !tolower(NAME)); 5021 5022 def _R : SULD_3D_V4_base<inst, outtype, (ins Int64Regs:$s), 5023 [(set outtype:$r, outtype:$g, outtype:$b, outtype:$a, 5024 (intr i64:$s, i32:$x, i32:$y, i32:$z))]>; 5025 def _I : SULD_3D_V4_base<inst, outtype, (ins i64imm:$s)>; 5026} 5027 5028defm SULD_3D_V4I8_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b8.clamp", Int16Regs>; 5029defm SULD_3D_V4I16_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b16.clamp", Int16Regs>; 5030defm SULD_3D_V4I32_CLAMP : SULD_3D_V4<"suld.b.3d.v4.b32.clamp", Int32Regs>; 5031 5032defm SULD_3D_V4I8_TRAP : SULD_3D_V4<"suld.b.3d.v4.b8.trap", Int16Regs>; 5033defm SULD_3D_V4I16_TRAP : SULD_3D_V4<"suld.b.3d.v4.b16.trap", Int16Regs>; 5034defm SULD_3D_V4I32_TRAP : SULD_3D_V4<"suld.b.3d.v4.b32.trap", Int32Regs>; 5035 5036defm SULD_3D_V4I8_ZERO : SULD_3D_V4<"suld.b.3d.v4.b8.zero", Int16Regs>; 5037defm SULD_3D_V4I16_ZERO : SULD_3D_V4<"suld.b.3d.v4.b16.zero", Int16Regs>; 5038defm SULD_3D_V4I32_ZERO : SULD_3D_V4<"suld.b.3d.v4.b32.zero", Int32Regs>; 5039 5040} 5041 5042//----------------------------------- 5043// Texture Query Intrinsics 5044//----------------------------------- 5045 5046let IsSurfTexQuery = true in { 5047def TXQ_CHANNEL_ORDER_R 5048 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 5049 "txq.channel_order.b32 \t$d, [$a];", 5050 []>; 5051def TXQ_CHANNEL_ORDER_I 5052 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 5053 "txq.channel_order.b32 \t$d, [$a];", 5054 []>; 5055def TXQ_CHANNEL_DATA_TYPE_R 5056 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 5057 "txq.channel_data_type.b32 \t$d, [$a];", 5058 []>; 5059def TXQ_CHANNEL_DATA_TYPE_I 5060 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 5061 "txq.channel_data_type.b32 \t$d, [$a];", 5062 []>; 5063def TXQ_WIDTH_R 5064 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 5065 "txq.width.b32 \t$d, [$a];", 5066 []>; 5067def TXQ_WIDTH_I 5068 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 5069 "txq.width.b32 \t$d, [$a];", 5070 []>; 5071def TXQ_HEIGHT_R 5072 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 5073 "txq.height.b32 \t$d, [$a];", 5074 []>; 5075def TXQ_HEIGHT_I 5076 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 5077 "txq.height.b32 \t$d, [$a];", 5078 []>; 5079def TXQ_DEPTH_R 5080 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 5081 "txq.depth.b32 \t$d, [$a];", 5082 []>; 5083def TXQ_DEPTH_I 5084 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 5085 "txq.depth.b32 \t$d, [$a];", 5086 []>; 5087def TXQ_ARRAY_SIZE_R 5088 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 5089 "txq.array_size.b32 \t$d, [$a];", 5090 []>; 5091def TXQ_ARRAY_SIZE_I 5092 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 5093 "txq.array_size.b32 \t$d, [$a];", 5094 []>; 5095def TXQ_NUM_SAMPLES_R 5096 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 5097 "txq.num_samples.b32 \t$d, [$a];", 5098 []>; 5099def TXQ_NUM_SAMPLES_I 5100 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 5101 "txq.num_samples.b32 \t$d, [$a];", 5102 []>; 5103def TXQ_NUM_MIPMAP_LEVELS_R 5104 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 5105 "txq.num_mipmap_levels.b32 \t$d, [$a];", 5106 []>; 5107def TXQ_NUM_MIPMAP_LEVELS_I 5108 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 5109 "txq.num_mipmap_levels.b32 \t$d, [$a];", 5110 []>; 5111} 5112 5113def : Pat<(int_nvvm_txq_channel_order i64:$a), 5114 (TXQ_CHANNEL_ORDER_R $a)>; 5115def : Pat<(int_nvvm_txq_channel_data_type i64:$a), 5116 (TXQ_CHANNEL_DATA_TYPE_R $a)>; 5117def : Pat<(int_nvvm_txq_width i64:$a), 5118 (TXQ_WIDTH_R $a)>; 5119def : Pat<(int_nvvm_txq_height i64:$a), 5120 (TXQ_HEIGHT_R $a)>; 5121def : Pat<(int_nvvm_txq_depth i64:$a), 5122 (TXQ_DEPTH_R $a)>; 5123def : Pat<(int_nvvm_txq_array_size i64:$a), 5124 (TXQ_ARRAY_SIZE_R $a)>; 5125def : Pat<(int_nvvm_txq_num_samples i64:$a), 5126 (TXQ_NUM_SAMPLES_R $a)>; 5127def : Pat<(int_nvvm_txq_num_mipmap_levels i64:$a), 5128 (TXQ_NUM_MIPMAP_LEVELS_R $a)>; 5129 5130 5131//----------------------------------- 5132// Surface Query Intrinsics 5133//----------------------------------- 5134 5135let IsSurfTexQuery = true in { 5136def SUQ_CHANNEL_ORDER_R 5137 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 5138 "suq.channel_order.b32 \t$d, [$a];", 5139 []>; 5140def SUQ_CHANNEL_ORDER_I 5141 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 5142 "suq.channel_order.b32 \t$d, [$a];", 5143 []>; 5144def SUQ_CHANNEL_DATA_TYPE_R 5145 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 5146 "suq.channel_data_type.b32 \t$d, [$a];", 5147 []>; 5148def SUQ_CHANNEL_DATA_TYPE_I 5149 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 5150 "suq.channel_data_type.b32 \t$d, [$a];", 5151 []>; 5152def SUQ_WIDTH_R 5153 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 5154 "suq.width.b32 \t$d, [$a];", 5155 []>; 5156def SUQ_WIDTH_I 5157 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 5158 "suq.width.b32 \t$d, [$a];", 5159 []>; 5160def SUQ_HEIGHT_R 5161 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 5162 "suq.height.b32 \t$d, [$a];", 5163 []>; 5164def SUQ_HEIGHT_I 5165 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 5166 "suq.height.b32 \t$d, [$a];", 5167 []>; 5168def SUQ_DEPTH_R 5169 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 5170 "suq.depth.b32 \t$d, [$a];", 5171 []>; 5172def SUQ_DEPTH_I 5173 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 5174 "suq.depth.b32 \t$d, [$a];", 5175 []>; 5176def SUQ_ARRAY_SIZE_R 5177 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 5178 "suq.array_size.b32 \t$d, [$a];", 5179 []>; 5180def SUQ_ARRAY_SIZE_I 5181 : NVPTXInst<(outs Int32Regs:$d), (ins i64imm:$a), 5182 "suq.array_size.b32 \t$d, [$a];", 5183 []>; 5184} 5185 5186def : Pat<(int_nvvm_suq_channel_order i64:$a), 5187 (SUQ_CHANNEL_ORDER_R $a)>; 5188def : Pat<(int_nvvm_suq_channel_data_type i64:$a), 5189 (SUQ_CHANNEL_DATA_TYPE_R $a)>; 5190def : Pat<(int_nvvm_suq_width i64:$a), 5191 (SUQ_WIDTH_R $a)>; 5192def : Pat<(int_nvvm_suq_height i64:$a), 5193 (SUQ_HEIGHT_R $a)>; 5194def : Pat<(int_nvvm_suq_depth i64:$a), 5195 (SUQ_DEPTH_R $a)>; 5196def : Pat<(int_nvvm_suq_array_size i64:$a), 5197 (SUQ_ARRAY_SIZE_R $a)>; 5198 5199 5200//===- Handle Query -------------------------------------------------------===// 5201 5202// TODO: These intrinsics are not yet finalized, pending PTX ISA design work 5203def ISTYPEP_SAMPLER 5204 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 5205 "istypep.samplerref \t$d, $a;", 5206 [(set i1:$d, (int_nvvm_istypep_sampler i64:$a))]>; 5207def ISTYPEP_SURFACE 5208 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 5209 "istypep.surfref \t$d, $a;", 5210 [(set i1:$d, (int_nvvm_istypep_surface i64:$a))]>; 5211def ISTYPEP_TEXTURE 5212 : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), 5213 "istypep.texref \t$d, $a;", 5214 [(set i1:$d, (int_nvvm_istypep_texture i64:$a))]>; 5215 5216//===- Surface Stores -----------------------------------------------------===// 5217 5218let IsSust = true in { 5219 5220class SUST_1D_base<string inst, NVPTXRegClass intype, dag surf> 5221 : NVPTXInst<(outs), 5222 !con(surf, (ins Int32Regs:$x, intype:$r)), 5223 inst # " \t[$s, \\{$x\\}], \\{$r\\};", 5224 []>; 5225multiclass SUST_1D<string inst, NVPTXRegClass intype> { 5226 def _R : SUST_1D_base<inst, intype, (ins Int64Regs:$s)>; 5227 def _I : SUST_1D_base<inst, intype, (ins i64imm:$s)>; 5228} 5229 5230defm SUST_B_1D_B8_CLAMP : SUST_1D<"sust.b.1d.b8.clamp", Int16Regs>; 5231defm SUST_B_1D_B16_CLAMP : SUST_1D<"sust.b.1d.b16.clamp", Int16Regs>; 5232defm SUST_B_1D_B32_CLAMP : SUST_1D<"sust.b.1d.b32.clamp", Int32Regs>; 5233defm SUST_B_1D_B64_CLAMP : SUST_1D<"sust.b.1d.b64.clamp", Int64Regs>; 5234 5235defm SUST_B_1D_B8_TRAP : SUST_1D<"sust.b.1d.b8.trap", Int16Regs>; 5236defm SUST_B_1D_B16_TRAP : SUST_1D<"sust.b.1d.b16.trap", Int16Regs>; 5237defm SUST_B_1D_B32_TRAP : SUST_1D<"sust.b.1d.b32.trap", Int32Regs>; 5238defm SUST_B_1D_B64_TRAP : SUST_1D<"sust.b.1d.b64.trap", Int64Regs>; 5239 5240defm SUST_B_1D_B8_ZERO : SUST_1D<"sust.b.1d.b8.zero", Int16Regs>; 5241defm SUST_B_1D_B16_ZERO : SUST_1D<"sust.b.1d.b16.zero", Int16Regs>; 5242defm SUST_B_1D_B32_ZERO : SUST_1D<"sust.b.1d.b32.zero", Int32Regs>; 5243defm SUST_B_1D_B64_ZERO : SUST_1D<"sust.b.1d.b64.zero", Int64Regs>; 5244 5245defm SUST_P_1D_B8_TRAP : SUST_1D<"sust.p.1d.b8.trap", Int16Regs>; 5246defm SUST_P_1D_B16_TRAP : SUST_1D<"sust.p.1d.b16.trap", Int16Regs>; 5247defm SUST_P_1D_B32_TRAP : SUST_1D<"sust.p.1d.b32.trap", Int32Regs>; 5248 5249class SUST_1D_V2_base<string inst, NVPTXRegClass intype, dag surf> 5250 : NVPTXInst<(outs), 5251 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g)), 5252 inst # " \t[$s, \\{$x\\}], \\{$r, $g\\};", 5253 []>; 5254multiclass SUST_1D_V2<string inst, NVPTXRegClass intype> { 5255 def _R : SUST_1D_V2_base<inst, intype, (ins Int64Regs:$s)>; 5256 def _I : SUST_1D_V2_base<inst, intype, (ins i64imm:$s)>; 5257} 5258 5259defm SUST_B_1D_V2B8_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b8.clamp", Int16Regs>; 5260defm SUST_B_1D_V2B16_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b16.clamp", Int16Regs>; 5261defm SUST_B_1D_V2B32_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b32.clamp", Int32Regs>; 5262defm SUST_B_1D_V2B64_CLAMP : SUST_1D_V2<"sust.b.1d.v2.b64.clamp", Int64Regs>; 5263 5264defm SUST_B_1D_V2B8_TRAP : SUST_1D_V2<"sust.b.1d.v2.b8.trap", Int16Regs>; 5265defm SUST_B_1D_V2B16_TRAP : SUST_1D_V2<"sust.b.1d.v2.b16.trap", Int16Regs>; 5266defm SUST_B_1D_V2B32_TRAP : SUST_1D_V2<"sust.b.1d.v2.b32.trap", Int32Regs>; 5267defm SUST_B_1D_V2B64_TRAP : SUST_1D_V2<"sust.b.1d.v2.b64.trap", Int64Regs>; 5268 5269defm SUST_B_1D_V2B8_ZERO : SUST_1D_V2<"sust.b.1d.v2.b8.zero", Int16Regs>; 5270defm SUST_B_1D_V2B16_ZERO : SUST_1D_V2<"sust.b.1d.v2.b16.zero", Int16Regs>; 5271defm SUST_B_1D_V2B32_ZERO : SUST_1D_V2<"sust.b.1d.v2.b32.zero", Int32Regs>; 5272defm SUST_B_1D_V2B64_ZERO : SUST_1D_V2<"sust.b.1d.v2.b64.zero", Int64Regs>; 5273 5274defm SUST_P_1D_V2B8_TRAP : SUST_1D_V2<"sust.p.1d.v2.b8.trap", Int16Regs>; 5275defm SUST_P_1D_V2B16_TRAP : SUST_1D_V2<"sust.p.1d.v2.b16.trap", Int16Regs>; 5276defm SUST_P_1D_V2B32_TRAP : SUST_1D_V2<"sust.p.1d.v2.b32.trap", Int32Regs>; 5277 5278class SUST_1D_V4_base<string inst, NVPTXRegClass intype, dag surf> 5279 : NVPTXInst<(outs), 5280 !con(surf, (ins Int32Regs:$x, intype:$r, intype:$g, 5281 intype:$b, intype:$a)), 5282 inst # " \t[$s, \\{$x\\}], \\{$r, $g, $b, $a\\};", 5283 []>; 5284multiclass SUST_1D_V4<string inst, NVPTXRegClass intype> { 5285 def _R : SUST_1D_V4_base<inst, intype, (ins Int64Regs:$s)>; 5286 def _I : SUST_1D_V4_base<inst, intype, (ins i64imm:$s)>; 5287} 5288 5289defm SUST_B_1D_V4B8_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b8.clamp", Int16Regs>; 5290defm SUST_B_1D_V4B16_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b16.clamp", Int16Regs>; 5291defm SUST_B_1D_V4B32_CLAMP : SUST_1D_V4<"sust.b.1d.v4.b32.clamp", Int32Regs>; 5292 5293defm SUST_B_1D_V4B8_TRAP : SUST_1D_V4<"sust.b.1d.v4.b8.trap", Int16Regs>; 5294defm SUST_B_1D_V4B16_TRAP : SUST_1D_V4<"sust.b.1d.v4.b16.trap", Int16Regs>; 5295defm SUST_B_1D_V4B32_TRAP : SUST_1D_V4<"sust.b.1d.v4.b32.trap", Int32Regs>; 5296 5297defm SUST_B_1D_V4B8_ZERO : SUST_1D_V4<"sust.b.1d.v4.b8.zero", Int16Regs>; 5298defm SUST_B_1D_V4B16_ZERO : SUST_1D_V4<"sust.b.1d.v4.b16.zero", Int16Regs>; 5299defm SUST_B_1D_V4B32_ZERO : SUST_1D_V4<"sust.b.1d.v4.b32.zero", Int32Regs>; 5300 5301defm SUST_P_1D_V4B8_TRAP : SUST_1D_V4<"sust.p.1d.v4.b8.trap", Int16Regs>; 5302defm SUST_P_1D_V4B16_TRAP : SUST_1D_V4<"sust.p.1d.v4.b16.trap", Int16Regs>; 5303defm SUST_P_1D_V4B32_TRAP : SUST_1D_V4<"sust.p.1d.v4.b32.trap", Int32Regs>; 5304 5305class SUST_1D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> 5306 : NVPTXInst<(outs), 5307 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, intype:$r)), 5308 inst # " \t[$s, \\{$idx, $x\\}], \\{$r\\};", 5309 []>; 5310multiclass SUST_1D_ARRAY<string inst, NVPTXRegClass intype> { 5311 def _R : SUST_1D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; 5312 def _I : SUST_1D_ARRAY_base<inst, intype, (ins i64imm:$s)>; 5313} 5314 5315defm SUST_B_1D_ARRAY_B8_CLAMP 5316 : SUST_1D_ARRAY<"sust.b.a1d.b8.clamp", Int16Regs>; 5317defm SUST_B_1D_ARRAY_B16_CLAMP 5318 : SUST_1D_ARRAY<"sust.b.a1d.b16.clamp", Int16Regs>; 5319defm SUST_B_1D_ARRAY_B32_CLAMP 5320 : SUST_1D_ARRAY<"sust.b.a1d.b32.clamp", Int32Regs>; 5321defm SUST_B_1D_ARRAY_B64_CLAMP 5322 : SUST_1D_ARRAY<"sust.b.a1d.b64.clamp", Int64Regs>; 5323 5324defm SUST_B_1D_ARRAY_B8_TRAP 5325 : SUST_1D_ARRAY<"sust.b.a1d.b8.trap", Int16Regs>; 5326defm SUST_B_1D_ARRAY_B16_TRAP 5327 : SUST_1D_ARRAY<"sust.b.a1d.b16.trap", Int16Regs>; 5328defm SUST_B_1D_ARRAY_B32_TRAP 5329 : SUST_1D_ARRAY<"sust.b.a1d.b32.trap", Int32Regs>; 5330defm SUST_B_1D_ARRAY_B64_TRAP 5331 : SUST_1D_ARRAY<"sust.b.a1d.b64.trap", Int64Regs>; 5332 5333defm SUST_B_1D_ARRAY_B8_ZERO 5334 : SUST_1D_ARRAY<"sust.b.a1d.b8.zero", Int16Regs>; 5335defm SUST_B_1D_ARRAY_B16_ZERO 5336 : SUST_1D_ARRAY<"sust.b.a1d.b16.zero", Int16Regs>; 5337defm SUST_B_1D_ARRAY_B32_ZERO 5338 : SUST_1D_ARRAY<"sust.b.a1d.b32.zero", Int32Regs>; 5339defm SUST_B_1D_ARRAY_B64_ZERO 5340 : SUST_1D_ARRAY<"sust.b.a1d.b64.zero", Int64Regs>; 5341 5342defm SUST_P_1D_ARRAY_B8_TRAP 5343 : SUST_1D_ARRAY<"sust.p.a1d.b8.trap", Int16Regs>; 5344defm SUST_P_1D_ARRAY_B16_TRAP 5345 : SUST_1D_ARRAY<"sust.p.a1d.b16.trap", Int16Regs>; 5346defm SUST_P_1D_ARRAY_B32_TRAP 5347 : SUST_1D_ARRAY<"sust.p.a1d.b32.trap", Int32Regs>; 5348 5349class SUST_1D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> 5350 : NVPTXInst<(outs), 5351 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, 5352 intype:$r, intype:$g)), 5353 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g\\};", 5354 []>; 5355multiclass SUST_1D_ARRAY_V2<string inst, NVPTXRegClass intype> { 5356 def _R : SUST_1D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; 5357 def _I : SUST_1D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; 5358} 5359 5360defm SUST_B_1D_ARRAY_V2B8_CLAMP 5361 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.clamp", Int16Regs>; 5362defm SUST_B_1D_ARRAY_V2B16_CLAMP 5363 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.clamp", Int16Regs>; 5364defm SUST_B_1D_ARRAY_V2B32_CLAMP 5365 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.clamp", Int32Regs>; 5366defm SUST_B_1D_ARRAY_V2B64_CLAMP 5367 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.clamp", Int64Regs>; 5368 5369defm SUST_B_1D_ARRAY_V2B8_TRAP 5370 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.trap", Int16Regs>; 5371defm SUST_B_1D_ARRAY_V2B16_TRAP 5372 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.trap", Int16Regs>; 5373defm SUST_B_1D_ARRAY_V2B32_TRAP 5374 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.trap", Int32Regs>; 5375defm SUST_B_1D_ARRAY_V2B64_TRAP 5376 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.trap", Int64Regs>; 5377 5378defm SUST_B_1D_ARRAY_V2B8_ZERO 5379 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b8.zero", Int16Regs>; 5380defm SUST_B_1D_ARRAY_V2B16_ZERO 5381 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b16.zero", Int16Regs>; 5382defm SUST_B_1D_ARRAY_V2B32_ZERO 5383 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b32.zero", Int32Regs>; 5384defm SUST_B_1D_ARRAY_V2B64_ZERO 5385 : SUST_1D_ARRAY_V2<"sust.b.a1d.v2.b64.zero", Int64Regs>; 5386 5387defm SUST_P_1D_ARRAY_V2B8_TRAP 5388 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b8.trap", Int16Regs>; 5389defm SUST_P_1D_ARRAY_V2B16_TRAP 5390 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b16.trap", Int16Regs>; 5391defm SUST_P_1D_ARRAY_V2B32_TRAP 5392 : SUST_1D_ARRAY_V2<"sust.p.a1d.v2.b32.trap", Int32Regs>; 5393 5394class SUST_1D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> 5395 : NVPTXInst<(outs), 5396 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, 5397 intype:$r, intype:$g, intype:$b, intype:$a)), 5398 inst # " \t[$s, \\{$idx, $x\\}], \\{$r, $g, $b, $a\\};", 5399 []>; 5400multiclass SUST_1D_ARRAY_V4<string inst, NVPTXRegClass intype> { 5401 def _R : SUST_1D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; 5402 def _I : SUST_1D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; 5403} 5404 5405defm SUST_B_1D_ARRAY_V4B8_CLAMP 5406 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.clamp", Int16Regs>; 5407defm SUST_B_1D_ARRAY_V4B16_CLAMP 5408 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.clamp", Int16Regs>; 5409defm SUST_B_1D_ARRAY_V4B32_CLAMP 5410 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.clamp", Int32Regs>; 5411 5412defm SUST_B_1D_ARRAY_V4B8_TRAP 5413 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.trap", Int16Regs>; 5414defm SUST_B_1D_ARRAY_V4B16_TRAP 5415 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.trap", Int16Regs>; 5416defm SUST_B_1D_ARRAY_V4B32_TRAP 5417 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.trap", Int32Regs>; 5418 5419defm SUST_B_1D_ARRAY_V4B8_ZERO 5420 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b8.zero", Int16Regs>; 5421defm SUST_B_1D_ARRAY_V4B16_ZERO 5422 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b16.zero", Int16Regs>; 5423defm SUST_B_1D_ARRAY_V4B32_ZERO 5424 : SUST_1D_ARRAY_V4<"sust.b.a1d.v4.b32.zero", Int32Regs>; 5425 5426defm SUST_P_1D_ARRAY_V4B8_TRAP 5427 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b8.trap", Int16Regs>; 5428defm SUST_P_1D_ARRAY_V4B16_TRAP 5429 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b16.trap", Int16Regs>; 5430defm SUST_P_1D_ARRAY_V4B32_TRAP 5431 : SUST_1D_ARRAY_V4<"sust.p.a1d.v4.b32.trap", Int32Regs>; 5432 5433class SUST_2D_base<string inst, NVPTXRegClass intype, dag surf> 5434 : NVPTXInst<(outs), 5435 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, intype:$r)), 5436 inst # " \t[$s, \\{$x, $y\\}], \\{$r\\};", 5437 []>; 5438multiclass SUST_2D<string inst, NVPTXRegClass intype> { 5439 def _R : SUST_2D_base<inst, intype, (ins Int64Regs:$s)>; 5440 def _I : SUST_2D_base<inst, intype, (ins i64imm:$s)>; 5441} 5442 5443defm SUST_B_2D_B8_CLAMP : SUST_2D<"sust.b.2d.b8.clamp", Int16Regs>; 5444defm SUST_B_2D_B16_CLAMP : SUST_2D<"sust.b.2d.b16.clamp", Int16Regs>; 5445defm SUST_B_2D_B32_CLAMP : SUST_2D<"sust.b.2d.b32.clamp", Int32Regs>; 5446defm SUST_B_2D_B64_CLAMP : SUST_2D<"sust.b.2d.b64.clamp", Int64Regs>; 5447 5448defm SUST_B_2D_B8_TRAP : SUST_2D<"sust.b.2d.b8.trap", Int16Regs>; 5449defm SUST_B_2D_B16_TRAP : SUST_2D<"sust.b.2d.b16.trap", Int16Regs>; 5450defm SUST_B_2D_B32_TRAP : SUST_2D<"sust.b.2d.b32.trap", Int32Regs>; 5451defm SUST_B_2D_B64_TRAP : SUST_2D<"sust.b.2d.b64.trap", Int64Regs>; 5452 5453defm SUST_B_2D_B8_ZERO : SUST_2D<"sust.b.2d.b8.zero", Int16Regs>; 5454defm SUST_B_2D_B16_ZERO : SUST_2D<"sust.b.2d.b16.zero", Int16Regs>; 5455defm SUST_B_2D_B32_ZERO : SUST_2D<"sust.b.2d.b32.zero", Int32Regs>; 5456defm SUST_B_2D_B64_ZERO : SUST_2D<"sust.b.2d.b64.zero", Int64Regs>; 5457 5458defm SUST_P_2D_B8_TRAP : SUST_2D<"sust.p.2d.b8.trap", Int16Regs>; 5459defm SUST_P_2D_B16_TRAP : SUST_2D<"sust.p.2d.b16.trap", Int16Regs>; 5460defm SUST_P_2D_B32_TRAP : SUST_2D<"sust.p.2d.b32.trap", Int32Regs>; 5461 5462class SUST_2D_V2_base<string inst, NVPTXRegClass intype, dag surf> 5463 : NVPTXInst<(outs), 5464 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, 5465 intype:$r, intype:$g)), 5466 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g\\};", 5467 []>; 5468multiclass SUST_2D_V2<string inst, NVPTXRegClass intype> { 5469 def _R : SUST_2D_V2_base<inst, intype, (ins Int64Regs:$s)>; 5470 def _I : SUST_2D_V2_base<inst, intype, (ins i64imm:$s)>; 5471} 5472 5473defm SUST_B_2D_V2B8_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b8.clamp", Int16Regs>; 5474defm SUST_B_2D_V2B16_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b16.clamp", Int16Regs>; 5475defm SUST_B_2D_V2B32_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b32.clamp", Int32Regs>; 5476defm SUST_B_2D_V2B64_CLAMP : SUST_2D_V2<"sust.b.2d.v2.b64.clamp", Int64Regs>; 5477 5478defm SUST_B_2D_V2B8_TRAP : SUST_2D_V2<"sust.b.2d.v2.b8.trap", Int16Regs>; 5479defm SUST_B_2D_V2B16_TRAP : SUST_2D_V2<"sust.b.2d.v2.b16.trap", Int16Regs>; 5480defm SUST_B_2D_V2B32_TRAP : SUST_2D_V2<"sust.b.2d.v2.b32.trap", Int32Regs>; 5481defm SUST_B_2D_V2B64_TRAP : SUST_2D_V2<"sust.b.2d.v2.b64.trap", Int64Regs>; 5482 5483defm SUST_B_2D_V2B8_ZERO : SUST_2D_V2<"sust.b.2d.v2.b8.zero", Int16Regs>; 5484defm SUST_B_2D_V2B16_ZERO : SUST_2D_V2<"sust.b.2d.v2.b16.zero", Int16Regs>; 5485defm SUST_B_2D_V2B32_ZERO : SUST_2D_V2<"sust.b.2d.v2.b32.zero", Int32Regs>; 5486defm SUST_B_2D_V2B64_ZERO : SUST_2D_V2<"sust.b.2d.v2.b64.zero", Int64Regs>; 5487 5488defm SUST_P_2D_V2B8_TRAP : SUST_2D_V2<"sust.p.2d.v2.b8.trap", Int16Regs>; 5489defm SUST_P_2D_V2B16_TRAP : SUST_2D_V2<"sust.p.2d.v2.b16.trap", Int16Regs>; 5490defm SUST_P_2D_V2B32_TRAP : SUST_2D_V2<"sust.p.2d.v2.b32.trap", Int32Regs>; 5491 5492class SUST_2D_V4_base<string inst, NVPTXRegClass intype, dag surf> 5493 : NVPTXInst<(outs), 5494 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, 5495 intype:$r, intype:$g, intype:$b, intype:$a)), 5496 inst # " \t[$s, \\{$x, $y\\}], \\{$r, $g, $b, $a\\};", 5497 []>; 5498multiclass SUST_2D_V4<string inst, NVPTXRegClass intype> { 5499 def _R : SUST_2D_V4_base<inst, intype, (ins Int64Regs:$s)>; 5500 def _I : SUST_2D_V4_base<inst, intype, (ins i64imm:$s)>; 5501} 5502 5503defm SUST_B_2D_V4B8_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b8.clamp", Int16Regs>; 5504defm SUST_B_2D_V4B16_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b16.clamp", Int16Regs>; 5505defm SUST_B_2D_V4B32_CLAMP : SUST_2D_V4<"sust.b.2d.v4.b32.clamp", Int32Regs>; 5506 5507defm SUST_B_2D_V4B8_TRAP : SUST_2D_V4<"sust.b.2d.v4.b8.trap", Int16Regs>; 5508defm SUST_B_2D_V4B16_TRAP : SUST_2D_V4<"sust.b.2d.v4.b16.trap", Int16Regs>; 5509defm SUST_B_2D_V4B32_TRAP : SUST_2D_V4<"sust.b.2d.v4.b32.trap", Int32Regs>; 5510 5511defm SUST_B_2D_V4B8_ZERO : SUST_2D_V4<"sust.b.2d.v4.b8.zero", Int16Regs>; 5512defm SUST_B_2D_V4B16_ZERO : SUST_2D_V4<"sust.b.2d.v4.b16.zero", Int16Regs>; 5513defm SUST_B_2D_V4B32_ZERO : SUST_2D_V4<"sust.b.2d.v4.b32.zero", Int32Regs>; 5514 5515defm SUST_P_2D_V4B8_TRAP : SUST_2D_V4<"sust.p.2d.v4.b8.trap", Int16Regs>; 5516defm SUST_P_2D_V4B16_TRAP : SUST_2D_V4<"sust.p.2d.v4.b16.trap", Int16Regs>; 5517defm SUST_P_2D_V4B32_TRAP : SUST_2D_V4<"sust.p.2d.v4.b32.trap", Int32Regs>; 5518 5519class SUST_2D_ARRAY_base<string inst, NVPTXRegClass intype, dag surf> 5520 : NVPTXInst<(outs), 5521 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5522 intype:$r)), 5523 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r\\};", 5524 []>; 5525multiclass SUST_2D_ARRAY<string inst, NVPTXRegClass intype> { 5526 def _R : SUST_2D_ARRAY_base<inst, intype, (ins Int64Regs:$s)>; 5527 def _I : SUST_2D_ARRAY_base<inst, intype, (ins i64imm:$s)>; 5528} 5529 5530defm SUST_B_2D_ARRAY_B8_CLAMP 5531 : SUST_2D_ARRAY<"sust.b.a2d.b8.clamp", Int16Regs>; 5532defm SUST_B_2D_ARRAY_B16_CLAMP 5533 : SUST_2D_ARRAY<"sust.b.a2d.b16.clamp", Int16Regs>; 5534defm SUST_B_2D_ARRAY_B32_CLAMP 5535 : SUST_2D_ARRAY<"sust.b.a2d.b32.clamp", Int32Regs>; 5536defm SUST_B_2D_ARRAY_B64_CLAMP 5537 : SUST_2D_ARRAY<"sust.b.a2d.b64.clamp", Int64Regs>; 5538 5539defm SUST_B_2D_ARRAY_B8_TRAP 5540 : SUST_2D_ARRAY<"sust.b.a2d.b8.trap", Int16Regs>; 5541defm SUST_B_2D_ARRAY_B16_TRAP 5542 : SUST_2D_ARRAY<"sust.b.a2d.b16.trap", Int16Regs>; 5543defm SUST_B_2D_ARRAY_B32_TRAP 5544 : SUST_2D_ARRAY<"sust.b.a2d.b32.trap", Int32Regs>; 5545defm SUST_B_2D_ARRAY_B64_TRAP 5546 : SUST_2D_ARRAY<"sust.b.a2d.b64.trap", Int64Regs>; 5547 5548defm SUST_B_2D_ARRAY_B8_ZERO 5549 : SUST_2D_ARRAY<"sust.b.a2d.b8.zero", Int16Regs>; 5550defm SUST_B_2D_ARRAY_B16_ZERO 5551 : SUST_2D_ARRAY<"sust.b.a2d.b16.zero", Int16Regs>; 5552defm SUST_B_2D_ARRAY_B32_ZERO 5553 : SUST_2D_ARRAY<"sust.b.a2d.b32.zero", Int32Regs>; 5554defm SUST_B_2D_ARRAY_B64_ZERO 5555 : SUST_2D_ARRAY<"sust.b.a2d.b64.zero", Int64Regs>; 5556 5557defm SUST_P_2D_ARRAY_B8_TRAP 5558 : SUST_2D_ARRAY<"sust.p.a2d.b8.trap", Int16Regs>; 5559defm SUST_P_2D_ARRAY_B16_TRAP 5560 : SUST_2D_ARRAY<"sust.p.a2d.b16.trap", Int16Regs>; 5561defm SUST_P_2D_ARRAY_B32_TRAP 5562 : SUST_2D_ARRAY<"sust.p.a2d.b32.trap", Int32Regs>; 5563 5564class SUST_2D_ARRAY_V2_base<string inst, NVPTXRegClass intype, dag surf> 5565 : NVPTXInst<(outs), 5566 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5567 intype:$r, intype:$g)), 5568 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g\\};", 5569 []>; 5570multiclass SUST_2D_ARRAY_V2<string inst, NVPTXRegClass intype> { 5571 def _R : SUST_2D_ARRAY_V2_base<inst, intype, (ins Int64Regs:$s)>; 5572 def _I : SUST_2D_ARRAY_V2_base<inst, intype, (ins i64imm:$s)>; 5573} 5574 5575defm SUST_B_2D_ARRAY_V2B8_CLAMP 5576 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.clamp", Int16Regs>; 5577defm SUST_B_2D_ARRAY_V2B16_CLAMP 5578 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.clamp", Int16Regs>; 5579defm SUST_B_2D_ARRAY_V2B32_CLAMP 5580 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.clamp", Int32Regs>; 5581defm SUST_B_2D_ARRAY_V2B64_CLAMP 5582 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.clamp", Int64Regs>; 5583 5584defm SUST_B_2D_ARRAY_V2B8_TRAP 5585 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.trap", Int16Regs>; 5586defm SUST_B_2D_ARRAY_V2B16_TRAP 5587 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.trap", Int16Regs>; 5588defm SUST_B_2D_ARRAY_V2B32_TRAP 5589 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.trap", Int32Regs>; 5590defm SUST_B_2D_ARRAY_V2B64_TRAP 5591 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.trap", Int64Regs>; 5592 5593defm SUST_B_2D_ARRAY_V2B8_ZERO 5594 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b8.zero", Int16Regs>; 5595defm SUST_B_2D_ARRAY_V2B16_ZERO 5596 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b16.zero", Int16Regs>; 5597defm SUST_B_2D_ARRAY_V2B32_ZERO 5598 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b32.zero", Int32Regs>; 5599defm SUST_B_2D_ARRAY_V2B64_ZERO 5600 : SUST_2D_ARRAY_V2<"sust.b.a2d.v2.b64.zero", Int64Regs>; 5601 5602defm SUST_P_2D_ARRAY_V2B8_TRAP 5603 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b8.trap", Int16Regs>; 5604defm SUST_P_2D_ARRAY_V2B16_TRAP 5605 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b16.trap", Int16Regs>; 5606defm SUST_P_2D_ARRAY_V2B32_TRAP 5607 : SUST_2D_ARRAY_V2<"sust.p.a2d.v2.b32.trap", Int32Regs>; 5608 5609class SUST_2D_ARRAY_V4_base<string inst, NVPTXRegClass intype, dag surf> 5610 : NVPTXInst<(outs), 5611 !con(surf, (ins Int32Regs:$idx, Int32Regs:$x, Int32Regs:$y, 5612 intype:$r, intype:$g, intype:$b, intype:$a)), 5613 inst # " \t[$s, \\{$idx, $x, $y, $y\\}], \\{$r, $g, $b, $a\\};", 5614 []>; 5615multiclass SUST_2D_ARRAY_V4<string inst, NVPTXRegClass intype> { 5616 def _R : SUST_2D_ARRAY_V4_base<inst, intype, (ins Int64Regs:$s)>; 5617 def _I : SUST_2D_ARRAY_V4_base<inst, intype, (ins i64imm:$s)>; 5618} 5619 5620defm SUST_B_2D_ARRAY_V4B8_CLAMP 5621 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.clamp", Int16Regs>; 5622defm SUST_B_2D_ARRAY_V4B16_CLAMP 5623 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.clamp", Int16Regs>; 5624defm SUST_B_2D_ARRAY_V4B32_CLAMP 5625 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.clamp", Int32Regs>; 5626 5627defm SUST_B_2D_ARRAY_V4B8_TRAP 5628 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.trap", Int16Regs>; 5629defm SUST_B_2D_ARRAY_V4B16_TRAP 5630 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.trap", Int16Regs>; 5631defm SUST_B_2D_ARRAY_V4B32_TRAP 5632 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.trap", Int32Regs>; 5633 5634defm SUST_B_2D_ARRAY_V4B8_ZERO 5635 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b8.zero", Int16Regs>; 5636defm SUST_B_2D_ARRAY_V4B16_ZERO 5637 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b16.zero", Int16Regs>; 5638defm SUST_B_2D_ARRAY_V4B32_ZERO 5639 : SUST_2D_ARRAY_V4<"sust.b.a2d.v4.b32.zero", Int32Regs>; 5640 5641defm SUST_P_2D_ARRAY_V4B8_TRAP 5642 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b8.trap", Int16Regs>; 5643defm SUST_P_2D_ARRAY_V4B16_TRAP 5644 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b16.trap", Int16Regs>; 5645defm SUST_P_2D_ARRAY_V4B32_TRAP 5646 : SUST_2D_ARRAY_V4<"sust.p.a2d.v4.b32.trap", Int32Regs>; 5647 5648class SUST_3D_base<string inst, NVPTXRegClass intype, dag surf> 5649 : NVPTXInst<(outs), 5650 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5651 intype:$r)), 5652 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r\\};", 5653 []>; 5654multiclass SUST_3D<string inst, NVPTXRegClass intype> { 5655 def _R : SUST_3D_base<inst, intype, (ins Int64Regs:$s)>; 5656 def _I : SUST_3D_base<inst, intype, (ins i64imm:$s)>; 5657} 5658 5659defm SUST_B_3D_B8_CLAMP : SUST_3D<"sust.b.3d.b8.clamp", Int16Regs>; 5660defm SUST_B_3D_B16_CLAMP : SUST_3D<"sust.b.3d.b16.clamp", Int16Regs>; 5661defm SUST_B_3D_B32_CLAMP : SUST_3D<"sust.b.3d.b32.clamp", Int32Regs>; 5662defm SUST_B_3D_B64_CLAMP : SUST_3D<"sust.b.3d.b64.clamp", Int64Regs>; 5663 5664defm SUST_B_3D_B8_TRAP : SUST_3D<"sust.b.3d.b8.trap", Int16Regs>; 5665defm SUST_B_3D_B16_TRAP : SUST_3D<"sust.b.3d.b16.trap", Int16Regs>; 5666defm SUST_B_3D_B32_TRAP : SUST_3D<"sust.b.3d.b32.trap", Int32Regs>; 5667defm SUST_B_3D_B64_TRAP : SUST_3D<"sust.b.3d.b64.trap", Int64Regs>; 5668 5669defm SUST_B_3D_B8_ZERO : SUST_3D<"sust.b.3d.b8.zero", Int16Regs>; 5670defm SUST_B_3D_B16_ZERO : SUST_3D<"sust.b.3d.b16.zero", Int16Regs>; 5671defm SUST_B_3D_B32_ZERO : SUST_3D<"sust.b.3d.b32.zero", Int32Regs>; 5672defm SUST_B_3D_B64_ZERO : SUST_3D<"sust.b.3d.b64.zero", Int64Regs>; 5673 5674defm SUST_P_3D_B8_TRAP : SUST_3D<"sust.p.3d.b8.trap", Int16Regs>; 5675defm SUST_P_3D_B16_TRAP : SUST_3D<"sust.p.3d.b16.trap", Int16Regs>; 5676defm SUST_P_3D_B32_TRAP : SUST_3D<"sust.p.3d.b32.trap", Int32Regs>; 5677 5678class SUST_3D_V2_base<string inst, NVPTXRegClass intype, dag surf> 5679 : NVPTXInst<(outs), 5680 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5681 intype:$r, intype:$g)), 5682 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g\\};", 5683 []>; 5684multiclass SUST_3D_V2<string inst, NVPTXRegClass intype> { 5685 def _R : SUST_3D_V2_base<inst, intype, (ins Int64Regs:$s)>; 5686 def _I : SUST_3D_V2_base<inst, intype, (ins i64imm:$s)>; 5687} 5688 5689defm SUST_B_3D_V2B8_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b8.clamp", Int16Regs>; 5690defm SUST_B_3D_V2B16_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b16.clamp", Int16Regs>; 5691defm SUST_B_3D_V2B32_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b32.clamp", Int32Regs>; 5692defm SUST_B_3D_V2B64_CLAMP : SUST_3D_V2<"sust.b.3d.v2.b64.clamp", Int64Regs>; 5693 5694defm SUST_B_3D_V2B8_TRAP : SUST_3D_V2<"sust.b.3d.v2.b8.trap", Int16Regs>; 5695defm SUST_B_3D_V2B16_TRAP : SUST_3D_V2<"sust.b.3d.v2.b16.trap", Int16Regs>; 5696defm SUST_B_3D_V2B32_TRAP : SUST_3D_V2<"sust.b.3d.v2.b32.trap", Int32Regs>; 5697defm SUST_B_3D_V2B64_TRAP : SUST_3D_V2<"sust.b.3d.v2.b64.trap", Int64Regs>; 5698 5699defm SUST_B_3D_V2B8_ZERO : SUST_3D_V2<"sust.b.3d.v2.b8.zero", Int16Regs>; 5700defm SUST_B_3D_V2B16_ZERO : SUST_3D_V2<"sust.b.3d.v2.b16.zero", Int16Regs>; 5701defm SUST_B_3D_V2B32_ZERO : SUST_3D_V2<"sust.b.3d.v2.b32.zero", Int32Regs>; 5702defm SUST_B_3D_V2B64_ZERO : SUST_3D_V2<"sust.b.3d.v2.b64.zero", Int64Regs>; 5703 5704defm SUST_P_3D_V2B8_TRAP : SUST_3D_V2<"sust.p.3d.v2.b8.trap", Int16Regs>; 5705defm SUST_P_3D_V2B16_TRAP : SUST_3D_V2<"sust.p.3d.v2.b16.trap", Int16Regs>; 5706defm SUST_P_3D_V2B32_TRAP : SUST_3D_V2<"sust.p.3d.v2.b32.trap", Int32Regs>; 5707 5708class SUST_3D_V4_base<string inst, NVPTXRegClass intype, dag surf> 5709 : NVPTXInst<(outs), 5710 !con(surf, (ins Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5711 intype:$r, intype:$g, intype:$b, intype:$a)), 5712 inst # " \t[$s, \\{$x, $y, $z, $z\\}], \\{$r, $g, $b, $a\\};", 5713 []>; 5714multiclass SUST_3D_V4<string inst, NVPTXRegClass intype> { 5715 def _R : SUST_3D_V4_base<inst, intype, (ins Int64Regs:$s)>; 5716 def _I : SUST_3D_V4_base<inst, intype, (ins i64imm:$s)>; 5717} 5718 5719defm SUST_B_3D_V4B8_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b8.clamp", Int16Regs>; 5720defm SUST_B_3D_V4B16_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b16.clamp", Int16Regs>; 5721defm SUST_B_3D_V4B32_CLAMP : SUST_3D_V4<"sust.b.3d.v4.b32.clamp", Int32Regs>; 5722 5723defm SUST_B_3D_V4B8_TRAP : SUST_3D_V4<"sust.b.3d.v4.b8.trap", Int16Regs>; 5724defm SUST_B_3D_V4B16_TRAP : SUST_3D_V4<"sust.b.3d.v4.b16.trap", Int16Regs>; 5725defm SUST_B_3D_V4B32_TRAP : SUST_3D_V4<"sust.b.3d.v4.b32.trap", Int32Regs>; 5726 5727defm SUST_B_3D_V4B8_ZERO : SUST_3D_V4<"sust.b.3d.v4.b8.zero", Int16Regs>; 5728defm SUST_B_3D_V4B16_ZERO : SUST_3D_V4<"sust.b.3d.v4.b16.zero", Int16Regs>; 5729defm SUST_B_3D_V4B32_ZERO : SUST_3D_V4<"sust.b.3d.v4.b32.zero", Int32Regs>; 5730 5731defm SUST_P_3D_V4B8_TRAP : SUST_3D_V4<"sust.p.3d.v4.b8.trap", Int16Regs>; 5732defm SUST_P_3D_V4B16_TRAP : SUST_3D_V4<"sust.p.3d.v4.b16.trap", Int16Regs>; 5733defm SUST_P_3D_V4B32_TRAP : SUST_3D_V4<"sust.p.3d.v4.b32.trap", Int32Regs>; 5734 5735} 5736 5737// Surface store instruction patterns 5738// I'm not sure why we can't just include these in the instruction definitions, 5739// but TableGen complains of type errors :( 5740 5741// .clamp variant 5742def : Pat<(int_nvvm_sust_b_1d_i8_clamp 5743 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5744 (SUST_B_1D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5745 5746def : Pat<(int_nvvm_sust_b_1d_i16_clamp 5747 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 5748 (SUST_B_1D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 5749 5750def : Pat<(int_nvvm_sust_b_1d_i32_clamp 5751 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 5752 (SUST_B_1D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 5753 5754def : Pat<(int_nvvm_sust_b_1d_i64_clamp 5755 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 5756 (SUST_B_1D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 5757 5758def : Pat<(int_nvvm_sust_b_1d_v2i8_clamp 5759 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5760 (SUST_B_1D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, 5761 Int16Regs:$r, Int16Regs:$g)>; 5762 5763def : Pat<(int_nvvm_sust_b_1d_v2i16_clamp 5764 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5765 (SUST_B_1D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, 5766 Int16Regs:$r, Int16Regs:$g)>; 5767 5768def : Pat<(int_nvvm_sust_b_1d_v2i32_clamp 5769 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5770 (SUST_B_1D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, 5771 Int32Regs:$r, Int32Regs:$g)>; 5772 5773def : Pat<(int_nvvm_sust_b_1d_v2i64_clamp 5774 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5775 (SUST_B_1D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, 5776 Int64Regs:$r, Int64Regs:$g)>; 5777 5778def : Pat<(int_nvvm_sust_b_1d_v4i8_clamp 5779 Int64Regs:$s, Int32Regs:$x, 5780 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5781 (SUST_B_1D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, 5782 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5783 5784def : Pat<(int_nvvm_sust_b_1d_v4i16_clamp 5785 Int64Regs:$s, Int32Regs:$x, 5786 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5787 (SUST_B_1D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, 5788 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5789 5790def : Pat<(int_nvvm_sust_b_1d_v4i32_clamp 5791 Int64Regs:$s, Int32Regs:$x, 5792 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5793 (SUST_B_1D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, 5794 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5795 5796 5797 5798def : Pat<(int_nvvm_sust_b_1d_array_i8_clamp 5799 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5800 (SUST_B_1D_ARRAY_B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5801 Int16Regs:$r)>; 5802 5803def : Pat<(int_nvvm_sust_b_1d_array_i16_clamp 5804 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 5805 (SUST_B_1D_ARRAY_B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5806 Int16Regs:$r)>; 5807 5808def : Pat<(int_nvvm_sust_b_1d_array_i32_clamp 5809 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 5810 (SUST_B_1D_ARRAY_B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5811 Int32Regs:$r)>; 5812 5813def : Pat<(int_nvvm_sust_b_1d_array_i64_clamp 5814 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 5815 (SUST_B_1D_ARRAY_B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5816 Int64Regs:$r)>; 5817 5818def : Pat<(int_nvvm_sust_b_1d_array_v2i8_clamp 5819 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5820 (SUST_B_1D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5821 Int16Regs:$r, Int16Regs:$g)>; 5822 5823def : Pat<(int_nvvm_sust_b_1d_array_v2i16_clamp 5824 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 5825 (SUST_B_1D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5826 Int16Regs:$r, Int16Regs:$g)>; 5827 5828def : Pat<(int_nvvm_sust_b_1d_array_v2i32_clamp 5829 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 5830 (SUST_B_1D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5831 Int32Regs:$r, Int32Regs:$g)>; 5832 5833def : Pat<(int_nvvm_sust_b_1d_array_v2i64_clamp 5834 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 5835 (SUST_B_1D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5836 Int64Regs:$r, Int64Regs:$g)>; 5837 5838def : Pat<(int_nvvm_sust_b_1d_array_v4i8_clamp 5839 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5840 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5841 (SUST_B_1D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5842 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5843 5844def : Pat<(int_nvvm_sust_b_1d_array_v4i16_clamp 5845 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5846 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5847 (SUST_B_1D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5848 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5849 5850def : Pat<(int_nvvm_sust_b_1d_array_v4i32_clamp 5851 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5852 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5853 (SUST_B_1D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 5854 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5855 5856 5857 5858def : Pat<(int_nvvm_sust_b_2d_i8_clamp 5859 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5860 (SUST_B_2D_B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5861 Int16Regs:$r)>; 5862 5863def : Pat<(int_nvvm_sust_b_2d_i16_clamp 5864 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5865 (SUST_B_2D_B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5866 Int16Regs:$r)>; 5867 5868def : Pat<(int_nvvm_sust_b_2d_i32_clamp 5869 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5870 (SUST_B_2D_B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5871 Int32Regs:$r)>; 5872 5873def : Pat<(int_nvvm_sust_b_2d_i64_clamp 5874 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5875 (SUST_B_2D_B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5876 Int64Regs:$r)>; 5877 5878def : Pat<(int_nvvm_sust_b_2d_v2i8_clamp 5879 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5880 (SUST_B_2D_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5881 Int16Regs:$r, Int16Regs:$g)>; 5882 5883def : Pat<(int_nvvm_sust_b_2d_v2i16_clamp 5884 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 5885 (SUST_B_2D_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5886 Int16Regs:$r, Int16Regs:$g)>; 5887 5888def : Pat<(int_nvvm_sust_b_2d_v2i32_clamp 5889 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 5890 (SUST_B_2D_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5891 Int32Regs:$r, Int32Regs:$g)>; 5892 5893def : Pat<(int_nvvm_sust_b_2d_v2i64_clamp 5894 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 5895 (SUST_B_2D_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5896 Int64Regs:$r, Int64Regs:$g)>; 5897 5898def : Pat<(int_nvvm_sust_b_2d_v4i8_clamp 5899 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5900 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5901 (SUST_B_2D_V4B8_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5902 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5903 5904def : Pat<(int_nvvm_sust_b_2d_v4i16_clamp 5905 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5906 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5907 (SUST_B_2D_V4B16_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5908 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5909 5910def : Pat<(int_nvvm_sust_b_2d_v4i32_clamp 5911 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5912 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5913 (SUST_B_2D_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 5914 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5915 5916 5917 5918def : Pat<(int_nvvm_sust_b_2d_array_i8_clamp 5919 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5920 (SUST_B_2D_ARRAY_B8_CLAMP_R Int64Regs:$s, 5921 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5922 Int16Regs:$r)>; 5923 5924def : Pat<(int_nvvm_sust_b_2d_array_i16_clamp 5925 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 5926 (SUST_B_2D_ARRAY_B16_CLAMP_R Int64Regs:$s, 5927 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5928 Int16Regs:$r)>; 5929 5930def : Pat<(int_nvvm_sust_b_2d_array_i32_clamp 5931 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 5932 (SUST_B_2D_ARRAY_B32_CLAMP_R Int64Regs:$s, 5933 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5934 Int32Regs:$r)>; 5935 5936def : Pat<(int_nvvm_sust_b_2d_array_i64_clamp 5937 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 5938 (SUST_B_2D_ARRAY_B64_CLAMP_R Int64Regs:$s, 5939 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5940 Int64Regs:$r)>; 5941 5942def : Pat<(int_nvvm_sust_b_2d_array_v2i8_clamp 5943 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5944 Int16Regs:$r, Int16Regs:$g), 5945 (SUST_B_2D_ARRAY_V2B8_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5946 Int32Regs:$x, Int32Regs:$y, 5947 Int16Regs:$r, Int16Regs:$g)>; 5948 5949def : Pat<(int_nvvm_sust_b_2d_array_v2i16_clamp 5950 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5951 Int16Regs:$r, Int16Regs:$g), 5952 (SUST_B_2D_ARRAY_V2B16_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5953 Int32Regs:$x, Int32Regs:$y, 5954 Int16Regs:$r, Int16Regs:$g)>; 5955 5956def : Pat<(int_nvvm_sust_b_2d_array_v2i32_clamp 5957 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 5958 Int32Regs:$g), 5959 (SUST_B_2D_ARRAY_V2B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5960 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 5961 5962def : Pat<(int_nvvm_sust_b_2d_array_v2i64_clamp 5963 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 5964 Int64Regs:$g), 5965 (SUST_B_2D_ARRAY_V2B64_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5966 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 5967 5968def : Pat<(int_nvvm_sust_b_2d_array_v4i8_clamp 5969 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5970 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5971 (SUST_B_2D_ARRAY_V4B8_CLAMP_R Int64Regs:$s, 5972 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5973 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5974 5975def : Pat<(int_nvvm_sust_b_2d_array_v4i16_clamp 5976 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5977 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 5978 (SUST_B_2D_ARRAY_V4B16_CLAMP_R Int64Regs:$s, 5979 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5980 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 5981 5982def : Pat<(int_nvvm_sust_b_2d_array_v4i32_clamp 5983 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 5984 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 5985 (SUST_B_2D_ARRAY_V4B32_CLAMP_R Int64Regs:$s, Int32Regs:$l, 5986 Int32Regs:$x, Int32Regs:$y, 5987 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 5988 5989 5990 5991def : Pat<(int_nvvm_sust_b_3d_i8_clamp 5992 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5993 Int16Regs:$r), 5994 (SUST_B_3D_B8_CLAMP_R Int64Regs:$s, 5995 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 5996 Int16Regs:$r)>; 5997 5998def : Pat<(int_nvvm_sust_b_3d_i16_clamp 5999 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6000 Int16Regs:$r), 6001 (SUST_B_3D_B16_CLAMP_R Int64Regs:$s, 6002 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6003 Int16Regs:$r)>; 6004 6005def : Pat<(int_nvvm_sust_b_3d_i32_clamp 6006 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6007 Int32Regs:$r), 6008 (SUST_B_3D_B32_CLAMP_R Int64Regs:$s, 6009 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6010 Int32Regs:$r)>; 6011 6012def : Pat<(int_nvvm_sust_b_3d_i64_clamp 6013 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6014 Int64Regs:$r), 6015 (SUST_B_3D_B64_CLAMP_R Int64Regs:$s, 6016 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6017 Int64Regs:$r)>; 6018 6019def : Pat<(int_nvvm_sust_b_3d_v2i8_clamp 6020 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6021 Int16Regs:$r, Int16Regs:$g), 6022 (SUST_B_3D_V2B8_CLAMP_R Int64Regs:$s, 6023 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6024 Int16Regs:$r, Int16Regs:$g)>; 6025 6026def : Pat<(int_nvvm_sust_b_3d_v2i16_clamp 6027 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6028 Int16Regs:$r, Int16Regs:$g), 6029 (SUST_B_3D_V2B16_CLAMP_R Int64Regs:$s, 6030 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6031 Int16Regs:$r, Int16Regs:$g)>; 6032 6033def : Pat<(int_nvvm_sust_b_3d_v2i32_clamp 6034 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6035 Int32Regs:$r, Int32Regs:$g), 6036 (SUST_B_3D_V2B32_CLAMP_R Int64Regs:$s, 6037 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6038 Int32Regs:$r, Int32Regs:$g)>; 6039 6040def : Pat<(int_nvvm_sust_b_3d_v2i64_clamp 6041 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6042 Int64Regs:$r, Int64Regs:$g), 6043 (SUST_B_3D_V2B64_CLAMP_R Int64Regs:$s, 6044 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6045 Int64Regs:$r, Int64Regs:$g)>; 6046 6047def : Pat<(int_nvvm_sust_b_3d_v4i8_clamp 6048 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6049 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6050 (SUST_B_3D_V4B8_CLAMP_R Int64Regs:$s, 6051 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6052 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6053 6054def : Pat<(int_nvvm_sust_b_3d_v4i16_clamp 6055 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6056 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6057 (SUST_B_3D_V4B16_CLAMP_R Int64Regs:$s, 6058 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6059 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6060 6061def : Pat<(int_nvvm_sust_b_3d_v4i32_clamp 6062 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6063 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6064 (SUST_B_3D_V4B32_CLAMP_R Int64Regs:$s, 6065 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6066 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6067 6068 6069// .trap variant 6070def : Pat<(int_nvvm_sust_b_1d_i8_trap 6071 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6072 (SUST_B_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6073 6074def : Pat<(int_nvvm_sust_b_1d_i16_trap 6075 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6076 (SUST_B_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6077 6078def : Pat<(int_nvvm_sust_b_1d_i32_trap 6079 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6080 (SUST_B_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6081 6082def : Pat<(int_nvvm_sust_b_1d_i64_trap 6083 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6084 (SUST_B_1D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6085 6086def : Pat<(int_nvvm_sust_b_1d_v2i8_trap 6087 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6088 (SUST_B_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 6089 Int16Regs:$r, Int16Regs:$g)>; 6090 6091def : Pat<(int_nvvm_sust_b_1d_v2i16_trap 6092 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6093 (SUST_B_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 6094 Int16Regs:$r, Int16Regs:$g)>; 6095 6096def : Pat<(int_nvvm_sust_b_1d_v2i32_trap 6097 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6098 (SUST_B_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 6099 Int32Regs:$r, Int32Regs:$g)>; 6100 6101def : Pat<(int_nvvm_sust_b_1d_v2i64_trap 6102 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6103 (SUST_B_1D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, 6104 Int64Regs:$r, Int64Regs:$g)>; 6105 6106def : Pat<(int_nvvm_sust_b_1d_v4i8_trap 6107 Int64Regs:$s, Int32Regs:$x, 6108 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6109 (SUST_B_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 6110 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6111 6112def : Pat<(int_nvvm_sust_b_1d_v4i16_trap 6113 Int64Regs:$s, Int32Regs:$x, 6114 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6115 (SUST_B_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 6116 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6117 6118def : Pat<(int_nvvm_sust_b_1d_v4i32_trap 6119 Int64Regs:$s, Int32Regs:$x, 6120 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6121 (SUST_B_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 6122 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6123 6124 6125 6126def : Pat<(int_nvvm_sust_b_1d_array_i8_trap 6127 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6128 (SUST_B_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6129 Int16Regs:$r)>; 6130 6131def : Pat<(int_nvvm_sust_b_1d_array_i16_trap 6132 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6133 (SUST_B_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6134 Int16Regs:$r)>; 6135 6136def : Pat<(int_nvvm_sust_b_1d_array_i32_trap 6137 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6138 (SUST_B_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6139 Int32Regs:$r)>; 6140 6141def : Pat<(int_nvvm_sust_b_1d_array_i64_trap 6142 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6143 (SUST_B_1D_ARRAY_B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6144 Int64Regs:$r)>; 6145 6146def : Pat<(int_nvvm_sust_b_1d_array_v2i8_trap 6147 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6148 (SUST_B_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6149 Int16Regs:$r, Int16Regs:$g)>; 6150 6151def : Pat<(int_nvvm_sust_b_1d_array_v2i16_trap 6152 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6153 (SUST_B_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6154 Int16Regs:$r, Int16Regs:$g)>; 6155 6156def : Pat<(int_nvvm_sust_b_1d_array_v2i32_trap 6157 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6158 (SUST_B_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6159 Int32Regs:$r, Int32Regs:$g)>; 6160 6161def : Pat<(int_nvvm_sust_b_1d_array_v2i64_trap 6162 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6163 (SUST_B_1D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6164 Int64Regs:$r, Int64Regs:$g)>; 6165 6166def : Pat<(int_nvvm_sust_b_1d_array_v4i8_trap 6167 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6168 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6169 (SUST_B_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6170 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6171 6172def : Pat<(int_nvvm_sust_b_1d_array_v4i16_trap 6173 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6174 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6175 (SUST_B_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6176 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6177 6178def : Pat<(int_nvvm_sust_b_1d_array_v4i32_trap 6179 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6180 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6181 (SUST_B_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6182 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6183 6184 6185 6186def : Pat<(int_nvvm_sust_b_2d_i8_trap 6187 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6188 (SUST_B_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6189 Int16Regs:$r)>; 6190 6191def : Pat<(int_nvvm_sust_b_2d_i16_trap 6192 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6193 (SUST_B_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6194 Int16Regs:$r)>; 6195 6196def : Pat<(int_nvvm_sust_b_2d_i32_trap 6197 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6198 (SUST_B_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6199 Int32Regs:$r)>; 6200 6201def : Pat<(int_nvvm_sust_b_2d_i64_trap 6202 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6203 (SUST_B_2D_B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6204 Int64Regs:$r)>; 6205 6206def : Pat<(int_nvvm_sust_b_2d_v2i8_trap 6207 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6208 (SUST_B_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6209 Int16Regs:$r, Int16Regs:$g)>; 6210 6211def : Pat<(int_nvvm_sust_b_2d_v2i16_trap 6212 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6213 (SUST_B_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6214 Int16Regs:$r, Int16Regs:$g)>; 6215 6216def : Pat<(int_nvvm_sust_b_2d_v2i32_trap 6217 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6218 (SUST_B_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6219 Int32Regs:$r, Int32Regs:$g)>; 6220 6221def : Pat<(int_nvvm_sust_b_2d_v2i64_trap 6222 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 6223 (SUST_B_2D_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6224 Int64Regs:$r, Int64Regs:$g)>; 6225 6226def : Pat<(int_nvvm_sust_b_2d_v4i8_trap 6227 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6228 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6229 (SUST_B_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6230 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6231 6232def : Pat<(int_nvvm_sust_b_2d_v4i16_trap 6233 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6234 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6235 (SUST_B_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6236 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6237 6238def : Pat<(int_nvvm_sust_b_2d_v4i32_trap 6239 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6240 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6241 (SUST_B_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6242 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6243 6244 6245 6246def : Pat<(int_nvvm_sust_b_2d_array_i8_trap 6247 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6248 (SUST_B_2D_ARRAY_B8_TRAP_R Int64Regs:$s, 6249 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6250 Int16Regs:$r)>; 6251 6252def : Pat<(int_nvvm_sust_b_2d_array_i16_trap 6253 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6254 (SUST_B_2D_ARRAY_B16_TRAP_R Int64Regs:$s, 6255 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6256 Int16Regs:$r)>; 6257 6258def : Pat<(int_nvvm_sust_b_2d_array_i32_trap 6259 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6260 (SUST_B_2D_ARRAY_B32_TRAP_R Int64Regs:$s, 6261 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6262 Int32Regs:$r)>; 6263 6264def : Pat<(int_nvvm_sust_b_2d_array_i64_trap 6265 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6266 (SUST_B_2D_ARRAY_B64_TRAP_R Int64Regs:$s, 6267 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6268 Int64Regs:$r)>; 6269 6270def : Pat<(int_nvvm_sust_b_2d_array_v2i8_trap 6271 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6272 Int16Regs:$r, Int16Regs:$g), 6273 (SUST_B_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, 6274 Int32Regs:$x, Int32Regs:$y, 6275 Int16Regs:$r, Int16Regs:$g)>; 6276 6277def : Pat<(int_nvvm_sust_b_2d_array_v2i16_trap 6278 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6279 Int16Regs:$r, Int16Regs:$g), 6280 (SUST_B_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, 6281 Int32Regs:$x, Int32Regs:$y, 6282 Int16Regs:$r, Int16Regs:$g)>; 6283 6284def : Pat<(int_nvvm_sust_b_2d_array_v2i32_trap 6285 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6286 Int32Regs:$g), 6287 (SUST_B_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 6288 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6289 6290def : Pat<(int_nvvm_sust_b_2d_array_v2i64_trap 6291 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 6292 Int64Regs:$g), 6293 (SUST_B_2D_ARRAY_V2B64_TRAP_R Int64Regs:$s, Int32Regs:$l, 6294 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 6295 6296def : Pat<(int_nvvm_sust_b_2d_array_v4i8_trap 6297 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6298 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6299 (SUST_B_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, 6300 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6301 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6302 6303def : Pat<(int_nvvm_sust_b_2d_array_v4i16_trap 6304 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6305 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6306 (SUST_B_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, 6307 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6308 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6309 6310def : Pat<(int_nvvm_sust_b_2d_array_v4i32_trap 6311 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6312 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6313 (SUST_B_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 6314 Int32Regs:$x, Int32Regs:$y, 6315 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6316 6317 6318 6319def : Pat<(int_nvvm_sust_b_3d_i8_trap 6320 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6321 Int16Regs:$r), 6322 (SUST_B_3D_B8_TRAP_R Int64Regs:$s, 6323 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6324 Int16Regs:$r)>; 6325 6326def : Pat<(int_nvvm_sust_b_3d_i16_trap 6327 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6328 Int16Regs:$r), 6329 (SUST_B_3D_B16_TRAP_R Int64Regs:$s, 6330 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6331 Int16Regs:$r)>; 6332 6333def : Pat<(int_nvvm_sust_b_3d_i32_trap 6334 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6335 Int32Regs:$r), 6336 (SUST_B_3D_B32_TRAP_R Int64Regs:$s, 6337 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6338 Int32Regs:$r)>; 6339 6340def : Pat<(int_nvvm_sust_b_3d_i64_trap 6341 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6342 Int64Regs:$r), 6343 (SUST_B_3D_B64_TRAP_R Int64Regs:$s, 6344 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6345 Int64Regs:$r)>; 6346 6347def : Pat<(int_nvvm_sust_b_3d_v2i8_trap 6348 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6349 Int16Regs:$r, Int16Regs:$g), 6350 (SUST_B_3D_V2B8_TRAP_R Int64Regs:$s, 6351 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6352 Int16Regs:$r, Int16Regs:$g)>; 6353 6354def : Pat<(int_nvvm_sust_b_3d_v2i16_trap 6355 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6356 Int16Regs:$r, Int16Regs:$g), 6357 (SUST_B_3D_V2B16_TRAP_R Int64Regs:$s, 6358 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6359 Int16Regs:$r, Int16Regs:$g)>; 6360 6361def : Pat<(int_nvvm_sust_b_3d_v2i32_trap 6362 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6363 Int32Regs:$r, Int32Regs:$g), 6364 (SUST_B_3D_V2B32_TRAP_R Int64Regs:$s, 6365 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6366 Int32Regs:$r, Int32Regs:$g)>; 6367 6368def : Pat<(int_nvvm_sust_b_3d_v2i64_trap 6369 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6370 Int64Regs:$r, Int64Regs:$g), 6371 (SUST_B_3D_V2B64_TRAP_R Int64Regs:$s, 6372 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6373 Int64Regs:$r, Int64Regs:$g)>; 6374 6375def : Pat<(int_nvvm_sust_b_3d_v4i8_trap 6376 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6377 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6378 (SUST_B_3D_V4B8_TRAP_R Int64Regs:$s, 6379 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6380 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6381 6382def : Pat<(int_nvvm_sust_b_3d_v4i16_trap 6383 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6384 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6385 (SUST_B_3D_V4B16_TRAP_R Int64Regs:$s, 6386 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6387 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6388 6389def : Pat<(int_nvvm_sust_b_3d_v4i32_trap 6390 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6391 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6392 (SUST_B_3D_V4B32_TRAP_R Int64Regs:$s, 6393 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6394 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6395 6396 6397// .zero variant 6398def : Pat<(int_nvvm_sust_b_1d_i8_zero 6399 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6400 (SUST_B_1D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6401 6402def : Pat<(int_nvvm_sust_b_1d_i16_zero 6403 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6404 (SUST_B_1D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6405 6406def : Pat<(int_nvvm_sust_b_1d_i32_zero 6407 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6408 (SUST_B_1D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6409 6410def : Pat<(int_nvvm_sust_b_1d_i64_zero 6411 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r), 6412 (SUST_B_1D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int64Regs:$r)>; 6413 6414def : Pat<(int_nvvm_sust_b_1d_v2i8_zero 6415 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6416 (SUST_B_1D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, 6417 Int16Regs:$r, Int16Regs:$g)>; 6418 6419def : Pat<(int_nvvm_sust_b_1d_v2i16_zero 6420 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6421 (SUST_B_1D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, 6422 Int16Regs:$r, Int16Regs:$g)>; 6423 6424def : Pat<(int_nvvm_sust_b_1d_v2i32_zero 6425 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6426 (SUST_B_1D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, 6427 Int32Regs:$r, Int32Regs:$g)>; 6428 6429def : Pat<(int_nvvm_sust_b_1d_v2i64_zero 6430 Int64Regs:$s, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6431 (SUST_B_1D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, 6432 Int64Regs:$r, Int64Regs:$g)>; 6433 6434def : Pat<(int_nvvm_sust_b_1d_v4i8_zero 6435 Int64Regs:$s, Int32Regs:$x, 6436 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6437 (SUST_B_1D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, 6438 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6439 6440def : Pat<(int_nvvm_sust_b_1d_v4i16_zero 6441 Int64Regs:$s, Int32Regs:$x, 6442 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6443 (SUST_B_1D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, 6444 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6445 6446def : Pat<(int_nvvm_sust_b_1d_v4i32_zero 6447 Int64Regs:$s, Int32Regs:$x, 6448 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6449 (SUST_B_1D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, 6450 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6451 6452 6453 6454def : Pat<(int_nvvm_sust_b_1d_array_i8_zero 6455 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6456 (SUST_B_1D_ARRAY_B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6457 Int16Regs:$r)>; 6458 6459def : Pat<(int_nvvm_sust_b_1d_array_i16_zero 6460 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6461 (SUST_B_1D_ARRAY_B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6462 Int16Regs:$r)>; 6463 6464def : Pat<(int_nvvm_sust_b_1d_array_i32_zero 6465 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6466 (SUST_B_1D_ARRAY_B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6467 Int32Regs:$r)>; 6468 6469def : Pat<(int_nvvm_sust_b_1d_array_i64_zero 6470 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r), 6471 (SUST_B_1D_ARRAY_B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6472 Int64Regs:$r)>; 6473 6474def : Pat<(int_nvvm_sust_b_1d_array_v2i8_zero 6475 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6476 (SUST_B_1D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6477 Int16Regs:$r, Int16Regs:$g)>; 6478 6479def : Pat<(int_nvvm_sust_b_1d_array_v2i16_zero 6480 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6481 (SUST_B_1D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6482 Int16Regs:$r, Int16Regs:$g)>; 6483 6484def : Pat<(int_nvvm_sust_b_1d_array_v2i32_zero 6485 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6486 (SUST_B_1D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6487 Int32Regs:$r, Int32Regs:$g)>; 6488 6489def : Pat<(int_nvvm_sust_b_1d_array_v2i64_zero 6490 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int64Regs:$r, Int64Regs:$g), 6491 (SUST_B_1D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6492 Int64Regs:$r, Int64Regs:$g)>; 6493 6494def : Pat<(int_nvvm_sust_b_1d_array_v4i8_zero 6495 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6496 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6497 (SUST_B_1D_ARRAY_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6498 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6499 6500def : Pat<(int_nvvm_sust_b_1d_array_v4i16_zero 6501 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6502 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6503 (SUST_B_1D_ARRAY_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6504 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6505 6506def : Pat<(int_nvvm_sust_b_1d_array_v4i32_zero 6507 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6508 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6509 (SUST_B_1D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6510 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6511 6512 6513 6514def : Pat<(int_nvvm_sust_b_2d_i8_zero 6515 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6516 (SUST_B_2D_B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6517 Int16Regs:$r)>; 6518 6519def : Pat<(int_nvvm_sust_b_2d_i16_zero 6520 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6521 (SUST_B_2D_B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6522 Int16Regs:$r)>; 6523 6524def : Pat<(int_nvvm_sust_b_2d_i32_zero 6525 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6526 (SUST_B_2D_B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6527 Int32Regs:$r)>; 6528 6529def : Pat<(int_nvvm_sust_b_2d_i64_zero 6530 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6531 (SUST_B_2D_B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6532 Int64Regs:$r)>; 6533 6534def : Pat<(int_nvvm_sust_b_2d_v2i8_zero 6535 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6536 (SUST_B_2D_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6537 Int16Regs:$r, Int16Regs:$g)>; 6538 6539def : Pat<(int_nvvm_sust_b_2d_v2i16_zero 6540 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6541 (SUST_B_2D_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6542 Int16Regs:$r, Int16Regs:$g)>; 6543 6544def : Pat<(int_nvvm_sust_b_2d_v2i32_zero 6545 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6546 (SUST_B_2D_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6547 Int32Regs:$r, Int32Regs:$g)>; 6548 6549def : Pat<(int_nvvm_sust_b_2d_v2i64_zero 6550 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g), 6551 (SUST_B_2D_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6552 Int64Regs:$r, Int64Regs:$g)>; 6553 6554def : Pat<(int_nvvm_sust_b_2d_v4i8_zero 6555 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6556 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6557 (SUST_B_2D_V4B8_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6558 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6559 6560def : Pat<(int_nvvm_sust_b_2d_v4i16_zero 6561 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6562 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6563 (SUST_B_2D_V4B16_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6564 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6565 6566def : Pat<(int_nvvm_sust_b_2d_v4i32_zero 6567 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6568 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6569 (SUST_B_2D_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6570 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6571 6572 6573 6574def : Pat<(int_nvvm_sust_b_2d_array_i8_zero 6575 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6576 (SUST_B_2D_ARRAY_B8_ZERO_R Int64Regs:$s, 6577 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6578 Int16Regs:$r)>; 6579 6580def : Pat<(int_nvvm_sust_b_2d_array_i16_zero 6581 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6582 (SUST_B_2D_ARRAY_B16_ZERO_R Int64Regs:$s, 6583 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6584 Int16Regs:$r)>; 6585 6586def : Pat<(int_nvvm_sust_b_2d_array_i32_zero 6587 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6588 (SUST_B_2D_ARRAY_B32_ZERO_R Int64Regs:$s, 6589 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6590 Int32Regs:$r)>; 6591 6592def : Pat<(int_nvvm_sust_b_2d_array_i64_zero 6593 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r), 6594 (SUST_B_2D_ARRAY_B64_ZERO_R Int64Regs:$s, 6595 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6596 Int64Regs:$r)>; 6597 6598def : Pat<(int_nvvm_sust_b_2d_array_v2i8_zero 6599 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6600 Int16Regs:$r, Int16Regs:$g), 6601 (SUST_B_2D_ARRAY_V2B8_ZERO_R Int64Regs:$s, Int32Regs:$l, 6602 Int32Regs:$x, Int32Regs:$y, 6603 Int16Regs:$r, Int16Regs:$g)>; 6604 6605def : Pat<(int_nvvm_sust_b_2d_array_v2i16_zero 6606 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6607 Int16Regs:$r, Int16Regs:$g), 6608 (SUST_B_2D_ARRAY_V2B16_ZERO_R Int64Regs:$s, Int32Regs:$l, 6609 Int32Regs:$x, Int32Regs:$y, 6610 Int16Regs:$r, Int16Regs:$g)>; 6611 6612def : Pat<(int_nvvm_sust_b_2d_array_v2i32_zero 6613 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6614 Int32Regs:$g), 6615 (SUST_B_2D_ARRAY_V2B32_ZERO_R Int64Regs:$s, Int32Regs:$l, 6616 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6617 6618def : Pat<(int_nvvm_sust_b_2d_array_v2i64_zero 6619 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, 6620 Int64Regs:$g), 6621 (SUST_B_2D_ARRAY_V2B64_ZERO_R Int64Regs:$s, Int32Regs:$l, 6622 Int32Regs:$x, Int32Regs:$y, Int64Regs:$r, Int64Regs:$g)>; 6623 6624def : Pat<(int_nvvm_sust_b_2d_array_v4i8_zero 6625 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6626 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6627 (SUST_B_2D_ARRAY_V4B8_ZERO_R Int64Regs:$s, 6628 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6629 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6630 6631def : Pat<(int_nvvm_sust_b_2d_array_v4i16_zero 6632 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6633 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6634 (SUST_B_2D_ARRAY_V4B16_ZERO_R Int64Regs:$s, 6635 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6636 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6637 6638def : Pat<(int_nvvm_sust_b_2d_array_v4i32_zero 6639 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6640 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6641 (SUST_B_2D_ARRAY_V4B32_ZERO_R Int64Regs:$s, Int32Regs:$l, 6642 Int32Regs:$x, Int32Regs:$y, 6643 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6644 6645 6646 6647def : Pat<(int_nvvm_sust_b_3d_i8_zero 6648 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6649 Int16Regs:$r), 6650 (SUST_B_3D_B8_ZERO_R Int64Regs:$s, 6651 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6652 Int16Regs:$r)>; 6653 6654def : Pat<(int_nvvm_sust_b_3d_i16_zero 6655 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6656 Int16Regs:$r), 6657 (SUST_B_3D_B16_ZERO_R Int64Regs:$s, 6658 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6659 Int16Regs:$r)>; 6660 6661def : Pat<(int_nvvm_sust_b_3d_i32_zero 6662 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6663 Int32Regs:$r), 6664 (SUST_B_3D_B32_ZERO_R Int64Regs:$s, 6665 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6666 Int32Regs:$r)>; 6667 6668def : Pat<(int_nvvm_sust_b_3d_i64_zero 6669 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6670 Int64Regs:$r), 6671 (SUST_B_3D_B64_ZERO_R Int64Regs:$s, 6672 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6673 Int64Regs:$r)>; 6674 6675def : Pat<(int_nvvm_sust_b_3d_v2i8_zero 6676 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6677 Int16Regs:$r, Int16Regs:$g), 6678 (SUST_B_3D_V2B8_ZERO_R Int64Regs:$s, 6679 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6680 Int16Regs:$r, Int16Regs:$g)>; 6681 6682def : Pat<(int_nvvm_sust_b_3d_v2i16_zero 6683 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6684 Int16Regs:$r, Int16Regs:$g), 6685 (SUST_B_3D_V2B16_ZERO_R Int64Regs:$s, 6686 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6687 Int16Regs:$r, Int16Regs:$g)>; 6688 6689def : Pat<(int_nvvm_sust_b_3d_v2i32_zero 6690 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6691 Int32Regs:$r, Int32Regs:$g), 6692 (SUST_B_3D_V2B32_ZERO_R Int64Regs:$s, 6693 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6694 Int32Regs:$r, Int32Regs:$g)>; 6695 6696def : Pat<(int_nvvm_sust_b_3d_v2i64_zero 6697 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6698 Int64Regs:$r, Int64Regs:$g), 6699 (SUST_B_3D_V2B64_ZERO_R Int64Regs:$s, 6700 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6701 Int64Regs:$r, Int64Regs:$g)>; 6702 6703def : Pat<(int_nvvm_sust_b_3d_v4i8_zero 6704 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6705 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6706 (SUST_B_3D_V4B8_ZERO_R Int64Regs:$s, 6707 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6708 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6709 6710def : Pat<(int_nvvm_sust_b_3d_v4i16_zero 6711 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6712 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6713 (SUST_B_3D_V4B16_ZERO_R Int64Regs:$s, 6714 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6715 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6716 6717def : Pat<(int_nvvm_sust_b_3d_v4i32_zero 6718 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6719 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6720 (SUST_B_3D_V4B32_ZERO_R Int64Regs:$s, 6721 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6722 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6723 6724 6725 6726 6727def : Pat<(int_nvvm_sust_p_1d_i8_trap 6728 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6729 (SUST_P_1D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6730 6731def : Pat<(int_nvvm_sust_p_1d_i16_trap 6732 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r), 6733 (SUST_P_1D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int16Regs:$r)>; 6734 6735def : Pat<(int_nvvm_sust_p_1d_i32_trap 6736 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r), 6737 (SUST_P_1D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$r)>; 6738 6739def : Pat<(int_nvvm_sust_p_1d_v2i8_trap 6740 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6741 (SUST_P_1D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 6742 Int16Regs:$r, Int16Regs:$g)>; 6743 6744def : Pat<(int_nvvm_sust_p_1d_v2i16_trap 6745 Int64Regs:$s, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6746 (SUST_P_1D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 6747 Int16Regs:$r, Int16Regs:$g)>; 6748 6749def : Pat<(int_nvvm_sust_p_1d_v2i32_trap 6750 Int64Regs:$s, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6751 (SUST_P_1D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 6752 Int32Regs:$r, Int32Regs:$g)>; 6753 6754def : Pat<(int_nvvm_sust_p_1d_v4i8_trap 6755 Int64Regs:$s, Int32Regs:$x, 6756 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6757 (SUST_P_1D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, 6758 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6759 6760def : Pat<(int_nvvm_sust_p_1d_v4i16_trap 6761 Int64Regs:$s, Int32Regs:$x, 6762 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6763 (SUST_P_1D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, 6764 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6765 6766def : Pat<(int_nvvm_sust_p_1d_v4i32_trap 6767 Int64Regs:$s, Int32Regs:$x, 6768 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6769 (SUST_P_1D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, 6770 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6771 6772 6773 6774def : Pat<(int_nvvm_sust_p_1d_array_i8_trap 6775 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6776 (SUST_P_1D_ARRAY_B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6777 Int16Regs:$r)>; 6778 6779def : Pat<(int_nvvm_sust_p_1d_array_i16_trap 6780 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r), 6781 (SUST_P_1D_ARRAY_B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6782 Int16Regs:$r)>; 6783 6784def : Pat<(int_nvvm_sust_p_1d_array_i32_trap 6785 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r), 6786 (SUST_P_1D_ARRAY_B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6787 Int32Regs:$r)>; 6788 6789def : Pat<(int_nvvm_sust_p_1d_array_v2i8_trap 6790 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6791 (SUST_P_1D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6792 Int16Regs:$r, Int16Regs:$g)>; 6793 6794def : Pat<(int_nvvm_sust_p_1d_array_v2i16_trap 6795 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int16Regs:$r, Int16Regs:$g), 6796 (SUST_P_1D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6797 Int16Regs:$r, Int16Regs:$g)>; 6798 6799def : Pat<(int_nvvm_sust_p_1d_array_v2i32_trap 6800 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$r, Int32Regs:$g), 6801 (SUST_P_1D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6802 Int32Regs:$r, Int32Regs:$g)>; 6803 6804def : Pat<(int_nvvm_sust_p_1d_array_v4i8_trap 6805 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6806 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6807 (SUST_P_1D_ARRAY_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6808 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6809 6810def : Pat<(int_nvvm_sust_p_1d_array_v4i16_trap 6811 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6812 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6813 (SUST_P_1D_ARRAY_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6814 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6815 6816def : Pat<(int_nvvm_sust_p_1d_array_v4i32_trap 6817 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6818 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6819 (SUST_P_1D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, 6820 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6821 6822 6823 6824def : Pat<(int_nvvm_sust_p_2d_i8_trap 6825 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6826 (SUST_P_2D_B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6827 Int16Regs:$r)>; 6828 6829def : Pat<(int_nvvm_sust_p_2d_i16_trap 6830 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6831 (SUST_P_2D_B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6832 Int16Regs:$r)>; 6833 6834def : Pat<(int_nvvm_sust_p_2d_i32_trap 6835 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6836 (SUST_P_2D_B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6837 Int32Regs:$r)>; 6838 6839def : Pat<(int_nvvm_sust_p_2d_v2i8_trap 6840 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6841 (SUST_P_2D_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6842 Int16Regs:$r, Int16Regs:$g)>; 6843 6844def : Pat<(int_nvvm_sust_p_2d_v2i16_trap 6845 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r, Int16Regs:$g), 6846 (SUST_P_2D_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6847 Int16Regs:$r, Int16Regs:$g)>; 6848 6849def : Pat<(int_nvvm_sust_p_2d_v2i32_trap 6850 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g), 6851 (SUST_P_2D_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6852 Int32Regs:$r, Int32Regs:$g)>; 6853 6854def : Pat<(int_nvvm_sust_p_2d_v4i8_trap 6855 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6856 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6857 (SUST_P_2D_V4B8_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6858 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6859 6860def : Pat<(int_nvvm_sust_p_2d_v4i16_trap 6861 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6862 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6863 (SUST_P_2D_V4B16_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6864 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6865 6866def : Pat<(int_nvvm_sust_p_2d_v4i32_trap 6867 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6868 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6869 (SUST_P_2D_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, 6870 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6871 6872 6873 6874def : Pat<(int_nvvm_sust_p_2d_array_i8_trap 6875 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6876 (SUST_P_2D_ARRAY_B8_TRAP_R Int64Regs:$s, 6877 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6878 Int16Regs:$r)>; 6879 6880def : Pat<(int_nvvm_sust_p_2d_array_i16_trap 6881 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int16Regs:$r), 6882 (SUST_P_2D_ARRAY_B16_TRAP_R Int64Regs:$s, 6883 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6884 Int16Regs:$r)>; 6885 6886def : Pat<(int_nvvm_sust_p_2d_array_i32_trap 6887 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r), 6888 (SUST_P_2D_ARRAY_B32_TRAP_R Int64Regs:$s, 6889 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6890 Int32Regs:$r)>; 6891 6892def : Pat<(int_nvvm_sust_p_2d_array_v2i8_trap 6893 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6894 Int16Regs:$r, Int16Regs:$g), 6895 (SUST_P_2D_ARRAY_V2B8_TRAP_R Int64Regs:$s, Int32Regs:$l, 6896 Int32Regs:$x, Int32Regs:$y, 6897 Int16Regs:$r, Int16Regs:$g)>; 6898 6899def : Pat<(int_nvvm_sust_p_2d_array_v2i16_trap 6900 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6901 Int16Regs:$r, Int16Regs:$g), 6902 (SUST_P_2D_ARRAY_V2B16_TRAP_R Int64Regs:$s, Int32Regs:$l, 6903 Int32Regs:$x, Int32Regs:$y, 6904 Int16Regs:$r, Int16Regs:$g)>; 6905 6906def : Pat<(int_nvvm_sust_p_2d_array_v2i32_trap 6907 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, 6908 Int32Regs:$g), 6909 (SUST_P_2D_ARRAY_V2B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 6910 Int32Regs:$x, Int32Regs:$y, Int32Regs:$r, Int32Regs:$g)>; 6911 6912def : Pat<(int_nvvm_sust_p_2d_array_v4i8_trap 6913 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6914 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6915 (SUST_P_2D_ARRAY_V4B8_TRAP_R Int64Regs:$s, 6916 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6917 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6918 6919def : Pat<(int_nvvm_sust_p_2d_array_v4i16_trap 6920 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6921 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6922 (SUST_P_2D_ARRAY_V4B16_TRAP_R Int64Regs:$s, 6923 Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6924 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6925 6926def : Pat<(int_nvvm_sust_p_2d_array_v4i32_trap 6927 Int64Regs:$s, Int32Regs:$l, Int32Regs:$x, Int32Regs:$y, 6928 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6929 (SUST_P_2D_ARRAY_V4B32_TRAP_R Int64Regs:$s, Int32Regs:$l, 6930 Int32Regs:$x, Int32Regs:$y, 6931 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6932 6933 6934 6935def : Pat<(int_nvvm_sust_p_3d_i8_trap 6936 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6937 Int16Regs:$r), 6938 (SUST_P_3D_B8_TRAP_R Int64Regs:$s, 6939 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6940 Int16Regs:$r)>; 6941 6942def : Pat<(int_nvvm_sust_p_3d_i16_trap 6943 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6944 Int16Regs:$r), 6945 (SUST_P_3D_B16_TRAP_R Int64Regs:$s, 6946 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6947 Int16Regs:$r)>; 6948 6949def : Pat<(int_nvvm_sust_p_3d_i32_trap 6950 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6951 Int32Regs:$r), 6952 (SUST_P_3D_B32_TRAP_R Int64Regs:$s, 6953 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6954 Int32Regs:$r)>; 6955 6956def : Pat<(int_nvvm_sust_p_3d_v2i8_trap 6957 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6958 Int16Regs:$r, Int16Regs:$g), 6959 (SUST_P_3D_V2B8_TRAP_R Int64Regs:$s, 6960 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6961 Int16Regs:$r, Int16Regs:$g)>; 6962 6963def : Pat<(int_nvvm_sust_p_3d_v2i16_trap 6964 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6965 Int16Regs:$r, Int16Regs:$g), 6966 (SUST_P_3D_V2B16_TRAP_R Int64Regs:$s, 6967 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6968 Int16Regs:$r, Int16Regs:$g)>; 6969 6970def : Pat<(int_nvvm_sust_p_3d_v2i32_trap 6971 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6972 Int32Regs:$r, Int32Regs:$g), 6973 (SUST_P_3D_V2B32_TRAP_R Int64Regs:$s, 6974 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6975 Int32Regs:$r, Int32Regs:$g)>; 6976 6977def : Pat<(int_nvvm_sust_p_3d_v4i8_trap 6978 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6979 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6980 (SUST_P_3D_V4B8_TRAP_R Int64Regs:$s, 6981 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6982 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6983 6984def : Pat<(int_nvvm_sust_p_3d_v4i16_trap 6985 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6986 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a), 6987 (SUST_P_3D_V4B16_TRAP_R Int64Regs:$s, 6988 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6989 Int16Regs:$r, Int16Regs:$g, Int16Regs:$b, Int16Regs:$a)>; 6990 6991def : Pat<(int_nvvm_sust_p_3d_v4i32_trap 6992 Int64Regs:$s, Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6993 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a), 6994 (SUST_P_3D_V4B32_TRAP_R Int64Regs:$s, 6995 Int32Regs:$x, Int32Regs:$y, Int32Regs:$z, 6996 Int32Regs:$r, Int32Regs:$g, Int32Regs:$b, Int32Regs:$a)>; 6997 6998//----------------------------------- 6999// Read Special Registers 7000//----------------------------------- 7001 7002class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]> 7003 : NVPTXInst<(outs Int64Regs:$d), (ins), 7004 !strconcat("mov.u64 \t$d, %", regname, ";"), 7005 [(set i64:$d, (intop))]>, 7006 Requires<Preds>; 7007 7008class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]> 7009 : NVPTXInst<(outs Int32Regs:$d), (ins), 7010 !strconcat("mov.u32 \t$d, %", regname, ";"), 7011 [(set i32:$d, (intop))]>, 7012 Requires<Preds>; 7013 7014multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> { 7015 foreach suffix = ["x", "y", "z", "w"] in { 7016 defvar reg = regname # "." # suffix; 7017 defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix); 7018 def "_"#suffix : PTX_READ_SREG_R32<reg, intr, Preds>; 7019 } 7020} 7021 7022// TODO Add read vector-version of special registers 7023 7024defm INT_PTX_SREG_TID : PTX_READ_SREG_R32V4<"tid">; 7025defm INT_PTX_SREG_NTID : PTX_READ_SREG_R32V4<"ntid">; 7026defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">; 7027defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">; 7028 7029defm INT_PTX_SREG_CLUSTERID : 7030 PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>; 7031defm INT_PTX_SREG_NCLUSTERID : 7032 PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>; 7033defm INT_PTX_SREG_CLUSTER_CTAID : 7034 PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>; 7035defm INT_PTX_SREG_CLUSTER_NCTAID: 7036 PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>; 7037 7038def INT_PTX_SREG_CLUSTER_CTARANK : 7039 PTX_READ_SREG_R32<"cluster_ctarank", 7040 int_nvvm_read_ptx_sreg_cluster_ctarank, 7041 [hasSM<90>, hasPTX<78>]>; 7042def INT_PTX_SREG_CLUSTER_NCTARANK: 7043 PTX_READ_SREG_R32<"cluster_nctarank", 7044 int_nvvm_read_ptx_sreg_cluster_nctarank, 7045 [hasSM<90>, hasPTX<78>]>; 7046 7047 7048def INT_PTX_SREG_LANEID : 7049 PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>; 7050def INT_PTX_SREG_WARPID : 7051 PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>; 7052def INT_PTX_SREG_NWARPID : 7053 PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>; 7054def INT_PTX_SREG_SMID : 7055 PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>; 7056def INT_PTX_SREG_NSMID : 7057 PTX_READ_SREG_R32<"nsmid", int_nvvm_read_ptx_sreg_nsmid>; 7058def INT_PTX_SREG_GRIDID : 7059 PTX_READ_SREG_R32<"gridid", int_nvvm_read_ptx_sreg_gridid>; 7060 7061def INT_PTX_SREG_LANEMASK_EQ : 7062 PTX_READ_SREG_R32<"lanemask_eq", int_nvvm_read_ptx_sreg_lanemask_eq>; 7063def INT_PTX_SREG_LANEMASK_LE : 7064 PTX_READ_SREG_R32<"lanemask_le", int_nvvm_read_ptx_sreg_lanemask_le>; 7065def INT_PTX_SREG_LANEMASK_LT : 7066 PTX_READ_SREG_R32<"lanemask_lt", int_nvvm_read_ptx_sreg_lanemask_lt>; 7067def INT_PTX_SREG_LANEMASK_GE : 7068 PTX_READ_SREG_R32<"lanemask_ge", int_nvvm_read_ptx_sreg_lanemask_ge>; 7069def INT_PTX_SREG_LANEMASK_GT : 7070 PTX_READ_SREG_R32<"lanemask_gt", int_nvvm_read_ptx_sreg_lanemask_gt>; 7071 7072let hasSideEffects = 1 in { 7073def INT_PTX_SREG_CLOCK : 7074 PTX_READ_SREG_R32<"clock", int_nvvm_read_ptx_sreg_clock>; 7075def INT_PTX_SREG_CLOCK64 : 7076 PTX_READ_SREG_R64<"clock64", int_nvvm_read_ptx_sreg_clock64>; 7077def INT_PTX_SREG_GLOBALTIMER : 7078 PTX_READ_SREG_R64<"globaltimer", int_nvvm_read_ptx_sreg_globaltimer>; 7079} 7080 7081def: Pat <(i64 (readcyclecounter)), (INT_PTX_SREG_CLOCK64)>; 7082def: Pat <(i64 (readsteadycounter)), (INT_PTX_SREG_GLOBALTIMER)>; 7083 7084def INT_PTX_SREG_PM0 : PTX_READ_SREG_R32<"pm0", int_nvvm_read_ptx_sreg_pm0>; 7085def INT_PTX_SREG_PM1 : PTX_READ_SREG_R32<"pm1", int_nvvm_read_ptx_sreg_pm1>; 7086def INT_PTX_SREG_PM2 : PTX_READ_SREG_R32<"pm2", int_nvvm_read_ptx_sreg_pm2>; 7087def INT_PTX_SREG_PM3 : PTX_READ_SREG_R32<"pm3", int_nvvm_read_ptx_sreg_pm3>; 7088 7089// TODO: It would be nice to use PTX_READ_SREG here, but it doesn't 7090// handle the constant. 7091def INT_PTX_SREG_WARPSIZE : 7092 NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;", 7093 [(set i32:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>; 7094 7095// Helper class that represents a 'fragment' of an NVPTX *MMA instruction. 7096// In addition to target-independent fields provided by WMMA_REGS, it adds 7097// the fields commonly used to implement specific PTX instruction -- register 7098// types and names, constraints, parts of assembly, etc. 7099class WMMA_REGINFO<WMMA_REGS r, string op> 7100 : WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> { 7101 // NVPTX register types used to carry fragment data. 7102 NVPTXRegClass regclass = !cond( 7103 !eq(ptx_elt_type, "f16") : Int32Regs, 7104 !eq(ptx_elt_type, "f32") : Float32Regs, 7105 !eq(ptx_elt_type, "f64") : Float64Regs, 7106 !eq(ptx_elt_type, "bf16") : Int32Regs, 7107 !eq(ptx_elt_type, "tf32") : Int32Regs, 7108 !eq(ptx_elt_type, "s32") : Int32Regs, 7109 !eq(ptx_elt_type, "b16") : Int32Regs, 7110 !eq(ptx_elt_type, "s8") : Int32Regs, 7111 !eq(ptx_elt_type, "u8") : Int32Regs, 7112 !eq(ptx_elt_type, "s4") : Int32Regs, 7113 !eq(ptx_elt_type, "u4") : Int32Regs, 7114 !eq(ptx_elt_type, "b1") : Int32Regs); 7115 7116 // Instruction input/output arguments for the fragment. 7117 list<NVPTXRegClass> ptx_regs = !listsplat(regclass, !size(regs)); 7118 7119 // List of register names for the fragment -- ["ra0", "ra1",...] 7120 list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret; 7121 7122 // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction. 7123 string regstring = "{{$" # !interleave(reg_names, ", $") # "}}"; 7124 7125 // Predicates for particular fragment variant. Technically those are 7126 // per-instruction predicates, but currently all fragments that can be used in 7127 // a given instruction are subject to the same constraints, so an instruction 7128 // can use predicates from any of its fragments. If/when this is no 7129 // longer the case, we can concat all per-fragment predicates to enforce that 7130 // all fragments of the instruction are viable. 7131 list<Predicate> Predicates = !cond( 7132 // fp16 -> fp16/fp32 @ m16n16k16 7133 !and(!eq(geom, "m16n16k16"), 7134 !or(!eq(ptx_elt_type, "f16"), 7135 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>], 7136 7137 !and(!eq(geom,"m8n8k4"), 7138 !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>], 7139 7140 // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16 7141 !and(!or(!eq(geom, "m8n32k16"), 7142 !eq(geom, "m32n8k16")), 7143 !or(!eq(ptx_elt_type, "f16"), 7144 !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>], 7145 7146 // u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16 7147 !and(!or(!eq(geom,"m16n16k16"), 7148 !eq(geom,"m8n32k16"), 7149 !eq(geom,"m32n8k16")), 7150 !or(!eq(ptx_elt_type, "u8"), 7151 !eq(ptx_elt_type, "s8"), 7152 !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>], 7153 7154 !and(!or(!eq(geom,"m16n16k16"), 7155 !eq(geom,"m8n32k16"), 7156 !eq(geom,"m32n8k16")), 7157 !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>], 7158 7159 !and(!eq(geom,"m16n16k8"), 7160 !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>], 7161 7162 !and(!eq(geom,"m16n16k8"), 7163 !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>], 7164 7165 // b1 -> s32 @ m8n8k128(b1) 7166 !and(!ne(op,"mma"), 7167 !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>], 7168 7169 // u4/s4 -> s32 @ m8n8k32 (u4/s4) 7170 !and(!ne(op,"mma"), 7171 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>], 7172 7173 !or(!eq(geom,"m16n8k8"), 7174 !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>], 7175 7176 !and(!ne(ptx_elt_type,"f64"), 7177 !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>], 7178 7179 // mma m8n8k32 requires higher PTX version 7180 !and(!eq(op,"mma"), 7181 !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>], 7182 7183 !and(!eq(ptx_elt_type,"f64"), 7184 !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>], 7185 7186 !and(!eq(op,"mma"), 7187 !or(!eq(geom, "m16n8k16"), 7188 !eq(geom, "m16n8k4"), 7189 !eq(geom, "m16n8k32"), 7190 !eq(geom, "m16n8k64"), 7191 !eq(geom, "m8n8k128"), 7192 !eq(geom, "m16n8k128"), 7193 !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>], 7194 7195 !and(!eq(op,"ldmatrix"), 7196 !eq(ptx_elt_type,"b16"), 7197 !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]); 7198 7199 // template DAGs for instruction inputs/output. 7200 dag Outs = !dag(outs, ptx_regs, reg_names); 7201 dag Ins = !dag(ins, ptx_regs, reg_names); 7202} 7203 7204// Convert dag of arguments into a dag to match given intrinsic. 7205class BuildPatternI<Intrinsic Intr, dag Ins> { 7206 // Build a dag pattern that matches the intrinsic call. 7207 dag ret = !foreach(tmp, Ins, 7208 !subst(imem, ADDRvar, 7209 !subst(MEMri64, ADDRri64, 7210 !subst(MEMri, ADDRri, 7211 !subst(ins, Intr, tmp))))); 7212} 7213 7214// Same as above, but uses PatFrag instead of an Intrinsic. 7215class BuildPatternPF<PatFrag Intr, dag Ins> { 7216 // Build a dag pattern that matches the intrinsic call. 7217 dag ret = !foreach(tmp, Ins, 7218 !subst(imem, ADDRvar, 7219 !subst(MEMri64, ADDRri64, 7220 !subst(MEMri, ADDRri, 7221 !subst(ins, Intr, tmp))))); 7222} 7223 7224// Common WMMA-related fields used for building patterns for all MMA instructions. 7225class WMMA_INSTR<string _Intr, list<dag> _Args> 7226 : NVPTXInst<(outs), (ins), "?", []> { 7227 Intrinsic Intr = !cast<Intrinsic>(_Intr); 7228 // Concatenate all arguments into a single dag. 7229 dag Args = !foldl((ins), _Args, a, b, !con(a,b)); 7230 // Pre-build the pattern to match (intrinsic arg0, arg1, ...). 7231 dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret; 7232} 7233 7234// 7235// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 7236// 7237 7238class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride, 7239 DAGOperand SrcOp> 7240 : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record, 7241 [!con((ins SrcOp:$src), 7242 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 7243 Requires<Frag.Predicates> { 7244 // Load/store intrinsics are overloaded on pointer's address space. 7245 // To match the right intrinsic, we need to build AS-constrained PatFrag. 7246 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 7247 dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src)); 7248 dag PFOperandsIntr = !if(WithStride, (Intr node:$src, node:$ldm), (Intr node:$src)); 7249 // Build PatFrag that only matches particular address space. 7250 PatFrag IntrFrag = PatFrag<PFOperands, 7251 PFOperandsIntr, 7252 !cond(!eq(Space, ".shared"): AS_match.shared, 7253 !eq(Space, ".global"): AS_match.global, 7254 true: AS_match.generic)>; 7255 // Build AS-constrained pattern. 7256 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 7257 7258 let OutOperandList = Frag.Outs; 7259 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7260 let AsmString = "wmma.load." 7261 # Frag.frag 7262 # ".sync" 7263 # "${ptx:aligned}" 7264 # "." # Layout 7265 # "." # Frag.geom 7266 # Space 7267 # "." # Frag.ptx_elt_type # " \t" 7268 # Frag.regstring 7269 # ", [$src]" 7270 # !if(WithStride, ", $ldm", "") 7271 # ";"; 7272} 7273 7274// 7275// wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32] 7276// 7277class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space, 7278 bit WithStride, DAGOperand DstOp> 7279 : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record, 7280 [!con((ins DstOp:$dst), 7281 Frag.Ins, 7282 !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>, 7283 Requires<Frag.Predicates> { 7284 7285 // Load/store intrinsics are overloaded on pointer's address space. 7286 // To match the right intrinsic, we need to build AS-constrained PatFrag. 7287 // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....). 7288 dag PFOperands = !con((ops node:$dst), 7289 !dag(ops, !listsplat(node, !size(Frag.regs)), Frag.reg_names), 7290 !if(WithStride, (ops node:$ldm), (ops))); 7291 // Build PatFrag that only matches particular address space. 7292 PatFrag IntrFrag = PatFrag<PFOperands, 7293 !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)), 7294 !cond(!eq(Space, ".shared"): AS_match.shared, 7295 !eq(Space, ".global"): AS_match.global, 7296 true: AS_match.generic)>; 7297 // Build AS-constrained pattern. 7298 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 7299 7300 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7301 let OutOperandList = (outs); 7302 let AsmString = "wmma.store.d.sync" 7303 # "${ptx:aligned}" 7304 # "." # Layout 7305 # "." # Frag.geom 7306 # Space 7307 # "." # Frag.ptx_elt_type 7308 # " \t[$dst]," 7309 # Frag.regstring 7310 # !if(WithStride, ", $ldm", "") 7311 # ";"; 7312} 7313 7314// Create all load/store variants 7315defset list<WMMA_INSTR> MMA_LDSTs = { 7316 foreach layout = ["row", "col"] in { 7317 foreach stride = [false, true] in { 7318 foreach space = [".global", ".shared", ""] in { 7319 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 7320 foreach frag = NVVM_MMA_OPS.all_ld_ops in 7321 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 7322 def : WMMA_LOAD<WMMA_REGINFO<frag, "load">, layout, space, stride, addr>; 7323 foreach frag = NVVM_MMA_OPS.all_st_ops in 7324 if NVVM_WMMA_LDST_SUPPORTED<frag, layout>.ret then 7325 def : WMMA_STORE_D<WMMA_REGINFO<frag, "store">, layout, space, stride, addr>; 7326 } // addr 7327 } // space 7328 } // stride 7329 } // layout 7330} // defset 7331 7332// B1 instruction variants need extra constraints. 7333class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> { 7334 string Op = b1op; 7335 WMMA_REGINFO Frag = FragA; 7336 list<Predicate> ret = !listconcat( 7337 FragA.Predicates, 7338 !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[]) 7339 ); 7340} 7341// WMMA.MMA 7342class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 7343 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 7344 string ALayout, string BLayout, int Satfinite, string rnd, string b1op> 7345 : WMMA_INSTR<WMMA_NAME<ALayout, BLayout, Satfinite, rnd, b1op, FragA, FragB, FragC, FragD>.record, 7346 [FragA.Ins, FragB.Ins, FragC.Ins]>, 7347 // Requires does not seem to have effect on Instruction w/o Patterns. 7348 // We set it here anyways and propagate to the Pat<> we construct below. 7349 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 7350 let OutOperandList = FragD.Outs; 7351 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7352 string TypeList = !cond( 7353 !eq(FragA.ptx_elt_type, "f16") : "." # FragD.ptx_elt_type 7354 # "." # FragC.ptx_elt_type, 7355 1: "." # FragD.ptx_elt_type 7356 # "." # FragA.ptx_elt_type 7357 # "." # FragB.ptx_elt_type 7358 # "." # FragC.ptx_elt_type, 7359 ); 7360 let AsmString = "wmma.mma" 7361 # b1op 7362 # ".sync" 7363 # "${ptx:aligned}" 7364 # "." # ALayout 7365 # "." # BLayout 7366 # "." # FragA.geom 7367 # !if(!ne(rnd, ""), !strconcat(".", rnd), "") 7368 # TypeList 7369 # !if(Satfinite, ".satfinite", "") # "\n\t\t" 7370 # FragD.regstring # ",\n\t\t" 7371 # FragA.regstring # ",\n\t\t" 7372 # FragB.regstring # ",\n\t\t" 7373 # FragC.regstring # ";"; 7374} 7375 7376let isConvergent = true in { 7377defset list<WMMA_INSTR> WMMAs = { 7378 foreach layout_a = ["row", "col"] in { 7379 foreach layout_b = ["row", "col"] in { 7380 foreach satf = [0, 1] in { 7381 foreach rnd = ["", "rn", "rz", "rm", "rp"] in { 7382 foreach op = NVVM_MMA_OPS.all_wmma_ops in { 7383 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 7384 if NVVM_WMMA_SUPPORTED<op, layout_a, layout_b, satf, rnd>.ret then { 7385 def : WMMA_MMA<WMMA_REGINFO<op[0], "wmma.mma">, 7386 WMMA_REGINFO<op[1], "wmma.mma">, 7387 WMMA_REGINFO<op[2], "wmma.mma">, 7388 WMMA_REGINFO<op[3], "wmma.mma">, 7389 layout_a, layout_b, satf, rnd, b1op>; 7390 } 7391 } // b1op 7392 } // op 7393 } // rnd 7394 } // satf 7395 } // layout_b 7396 } // layout_a 7397} // defset 7398} 7399 7400// MMA 7401class MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB, 7402 WMMA_REGINFO FragC, WMMA_REGINFO FragD, 7403 string ALayout, string BLayout, int Satfinite, string b1op> 7404 : WMMA_INSTR<MMA_NAME<ALayout, BLayout, Satfinite, b1op, FragA, FragB, FragC, FragD>.record, 7405 [FragA.Ins, FragB.Ins, FragC.Ins]>, 7406 // Requires does not seem to have effect on Instruction w/o Patterns. 7407 // We set it here anyways and propagate to the Pat<> we construct below. 7408 Requires<MMA_OP_PREDICATES<FragA, b1op>.ret> { 7409 let OutOperandList = FragD.Outs; 7410 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7411 string TypeList = "." # FragD.ptx_elt_type 7412 # "." # FragA.ptx_elt_type 7413 # "." # FragB.ptx_elt_type 7414 # "." # FragC.ptx_elt_type; 7415 let AsmString = "mma.sync.aligned." 7416 # FragA.geom 7417 # "." # ALayout 7418 # "." # BLayout 7419 # !if(Satfinite, ".satfinite", "") 7420 # TypeList 7421 # b1op # "\n\t\t" 7422 # FragD.regstring # ",\n\t\t" 7423 # FragA.regstring # ",\n\t\t" 7424 # FragB.regstring # ",\n\t\t" 7425 # FragC.regstring # ";"; 7426} 7427 7428let isConvergent = true in { 7429defset list<WMMA_INSTR> MMAs = { 7430 foreach layout_a = ["row", "col"] in { 7431 foreach layout_b = ["row", "col"] in { 7432 foreach satf = [0, 1] in { 7433 foreach op = NVVM_MMA_OPS.all_mma_ops in { 7434 foreach b1op = NVVM_MMA_B1OPS<op>.ret in { 7435 if NVVM_MMA_SUPPORTED<op, layout_a, layout_b, satf>.ret then { 7436 def : MMA<WMMA_REGINFO<op[0], "mma">, 7437 WMMA_REGINFO<op[1], "mma">, 7438 WMMA_REGINFO<op[2], "mma">, 7439 WMMA_REGINFO<op[3], "mma">, 7440 layout_a, layout_b, satf, b1op>; 7441 } 7442 } // b1op 7443 } // op 7444 } // satf 7445 } // layout_b 7446 } // layout_a 7447} // defset 7448} 7449 7450// 7451// ldmatrix.sync.aligned.m8n8[|.trans][|.shared].b16 7452// 7453class LDMATRIX<WMMA_REGINFO Frag, bit Transposed, string Space, 7454 DAGOperand SrcOp> 7455 : WMMA_INSTR<LDMATRIX_NAME<Frag, Transposed>.record, [(ins SrcOp:$src)]>, 7456 Requires<Frag.Predicates> { 7457 // Build PatFrag that only matches particular address space. 7458 PatFrag IntrFrag = PatFrag<(ops node:$src), (Intr node:$src), 7459 !cond(!eq(Space, ".shared"): AS_match.shared, 7460 true: AS_match.generic)>; 7461 // Build AS-constrained pattern. 7462 let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret; 7463 7464 let OutOperandList = Frag.Outs; 7465 let InOperandList = !con(Args, (ins MmaCode:$ptx)); 7466 let AsmString = "ldmatrix.sync.aligned." 7467 # Frag.geom 7468 # "." # Frag.frag 7469 # !if(Transposed, ".trans", "") 7470 # Space 7471 # "." # Frag.ptx_elt_type 7472 # " " # Frag.regstring # ", [$src];"; 7473} 7474 7475// Create all ldmatrix variants 7476defset list<WMMA_INSTR> LDMATRIXs = { 7477 foreach transposed = [false, true] in { 7478 foreach space = [".shared", ""] in { 7479 foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in { 7480 foreach frag = NVVM_MMA_OPS.all_ldmatrix_ops in 7481 if NVVM_LDMATRIX_SUPPORTED<frag>.ret then 7482 def : LDMATRIX<WMMA_REGINFO<frag, "ldmatrix">, transposed, space, 7483 addr>; 7484 } // addr 7485 } // space 7486 } // transposed 7487} // defset 7488 7489// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a 7490// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with 7491// the instruction record. 7492class MMA_PAT<WMMA_INSTR wi> 7493 : Pat<wi.IntrinsicPattern, 7494 !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)), 7495 (wi ptx.version))>, 7496 Requires<wi.Predicates>; 7497 7498// Build intrinsic->instruction patterns for all MMA instructions. 7499foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in 7500 def : MMA_PAT<mma>; 7501 7502multiclass MAPA<string suffix, Intrinsic Intr> { 7503 def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b), 7504 "mapa" # suffix # ".u32\t$d, $a, $b;", 7505 [(set i32:$d, (Intr i32:$a, i32:$b))]>, 7506 Requires<[hasSM<90>, hasPTX<78>]>; 7507 def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b), 7508 "mapa" # suffix # ".u32\t$d, $a, $b;", 7509 [(set i32:$d, (Intr i32:$a, imm:$b))]>, 7510 Requires<[hasSM<90>, hasPTX<78>]>; 7511 def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b), 7512 "mapa" # suffix # ".u64\t$d, $a, $b;", 7513 [(set i64:$d, (Intr i64:$a, i32:$b))]>, 7514 Requires<[hasSM<90>, hasPTX<78>]>; 7515 def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b), 7516 "mapa" # suffix # ".u64\t$d, $a, $b;", 7517 [(set i64:$d, (Intr i64:$a, imm:$b))]>, 7518 Requires<[hasSM<90>, hasPTX<78>]>; 7519} 7520 7521defm mapa : MAPA<"", int_nvvm_mapa>; 7522defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>; 7523 7524 7525multiclass GETCTARANK<string suffix, Intrinsic Intr> { 7526 def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a), 7527 "getctarank" # suffix # ".u32\t$d, $a;", 7528 [(set i32:$d, (Intr i32:$a))]>, 7529 Requires<[hasSM<90>, hasPTX<78>]>; 7530 def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a), 7531 "getctarank" # suffix # ".u64\t$d, $a;", 7532 [(set i32:$d, (Intr i64:$a))]>, 7533 Requires<[hasSM<90>, hasPTX<78>]>; 7534} 7535 7536defm getctarank : GETCTARANK<"", int_nvvm_getctarank>; 7537defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>; 7538 7539def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins), 7540 "mov.pred\t$d, %is_explicit_cluster;", 7541 [(set i1:$d, (int_nvvm_is_explicit_cluster))]>, 7542 Requires<[hasSM<90>, hasPTX<78>]>; 7543 7544// setmaxnreg inc/dec intrinsics 7545let isConvergent = true in { 7546multiclass SET_MAXNREG<string Action, Intrinsic Intr> { 7547 def : NVPTXInst<(outs), (ins i32imm:$reg_count), 7548 "setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;", 7549 [(Intr timm:$reg_count)]>, 7550 Requires<[hasSM90a, hasPTX<80>]>; 7551} 7552 7553defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>; 7554defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>; 7555 7556} // isConvergent 7557 7558// 7559// WGMMA fence instructions 7560// 7561let isConvergent = true in { 7562def INT_NVVM_WGMMA_FENCE_SYNC_ALIGNED : NVPTXInst<(outs), (ins), "wgmma.fence.sync.aligned;", 7563 [(int_nvvm_wgmma_fence_sync_aligned)]>, Requires<[hasSM90a, hasPTX<80>]>; 7564 7565def INT_NVVM_WGMMA_COMMIT_GROUP_SYNC_ALIGNED : NVPTXInst<(outs), (ins), "wgmma.commit_group.sync.aligned;", 7566 [(int_nvvm_wgmma_commit_group_sync_aligned)]>, Requires<[hasSM90a, hasPTX<80>]>; 7567 7568def INT_NVVM_WGMMA_WAIT_GROUP_SYNC_ALIGNED : NVPTXInst<(outs), (ins i64imm:$n), "wgmma.wait_group.sync.aligned \t$n;", 7569 [(int_nvvm_wgmma_wait_group_sync_aligned timm:$n)]>, Requires<[hasSM90a, hasPTX<80>]>; 7570} // isConvergent = true 7571 7572def GRIDDEPCONTROL_LAUNCH_DEPENDENTS : 7573 NVPTXInst<(outs), (ins), 7574 "griddepcontrol.launch_dependents;", 7575 [(int_nvvm_griddepcontrol_launch_dependents)]>, 7576 Requires<[hasSM<90>, hasPTX<78>]>; 7577 7578def GRIDDEPCONTROL_WAIT : 7579 NVPTXInst<(outs), (ins), 7580 "griddepcontrol.wait;", 7581 [(int_nvvm_griddepcontrol_wait)]>, 7582 Requires<[hasSM<90>, hasPTX<78>]>; 7583 7584def INT_EXIT : NVPTXInst<(outs), (ins), "exit;", [(int_nvvm_exit)]>; 7585