1*35f7b60aSVikram Hegde; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2*35f7b60aSVikram Hegde; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-SDAG %s 3*35f7b60aSVikram Hegde; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-SDAG %s 4*35f7b60aSVikram Hegde; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s 5*35f7b60aSVikram Hegde 6*35f7b60aSVikram Hegdedefine void @v_permlane16_p0(ptr addrspace(1) %out, ptr %src0, i32 %src1, i32 %src2) { 7*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_p0: 8*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 9*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v4 11*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v5 12*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v3, v3, s4, s5 13*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v2, v2, s4, s5 14*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dwordx2 v[0:1], v[2:3], off 15*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 16*35f7b60aSVikram Hegde; 17*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_p0: 18*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 19*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v4 21*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v5 22*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 23*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v3, v3, s0, s1 24*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 25*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off 26*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 27*35f7b60aSVikram Hegde; 28*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_p0: 29*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 30*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 31*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 32*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 33*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 34*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 35*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v4 36*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v5 37*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 38*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v3, v3, s0, s1 39*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 40*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off 41*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 42*35f7b60aSVikram Hegde %v = call ptr @llvm.amdgcn.permlane16.p0(ptr %src0, ptr %src0, i32 %src1, i32 %src2, i1 false, i1 false) 43*35f7b60aSVikram Hegde store ptr %v, ptr addrspace(1) %out 44*35f7b60aSVikram Hegde ret void 45*35f7b60aSVikram Hegde} 46*35f7b60aSVikram Hegde 47*35f7b60aSVikram Hegdedefine void @v_permlanex16_p0(ptr addrspace(1) %out, ptr %src0, i32 %src1, i32 %src2) { 48*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_p0: 49*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 50*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 51*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v4 52*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v5 53*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v3, v3, s4, s5 54*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v2, v2, s4, s5 55*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dwordx2 v[0:1], v[2:3], off 56*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 57*35f7b60aSVikram Hegde; 58*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_p0: 59*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 60*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v4 62*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v5 63*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 64*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v3, v3, s0, s1 65*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 66*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off 67*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 68*35f7b60aSVikram Hegde; 69*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_p0: 70*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 71*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 72*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 73*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 74*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 75*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 76*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v4 77*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v5 78*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 79*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v3, v3, s0, s1 80*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 81*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b64 v[0:1], v[2:3], off 82*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 83*35f7b60aSVikram Hegde %v = call ptr @llvm.amdgcn.permlanex16.p0(ptr %src0, ptr %src0, i32 %src1, i32 %src2, i1 false, i1 false) 84*35f7b60aSVikram Hegde store ptr %v, ptr addrspace(1) %out 85*35f7b60aSVikram Hegde ret void 86*35f7b60aSVikram Hegde} 87*35f7b60aSVikram Hegde 88*35f7b60aSVikram Hegdedefine void @v_permlane16_v3p0(ptr addrspace(1) %out, <3 x ptr> %src0, i32 %src1, i32 %src2) { 89*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_v3p0: 90*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 91*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 92*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v8 93*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v9 94*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v7, v7, s4, s5 95*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v6, v6, s4, s5 96*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v5, v5, s4, s5 97*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v4, v4, s4, s5 98*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v3, v3, s4, s5 99*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v2, v2, s4, s5 100*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:16 101*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 102*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 103*35f7b60aSVikram Hegde; 104*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_v3p0: 105*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 106*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 107*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v8 108*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v9 109*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 110*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v7, v7, s0, s1 111*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v6, v6, s0, s1 112*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v5, v5, s0, s1 113*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v4, v4, s0, s1 114*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v3, v3, s0, s1 115*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 116*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_clause 0x1 117*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off offset:16 118*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off 119*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 120*35f7b60aSVikram Hegde; 121*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_v3p0: 122*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 123*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 124*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 125*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 126*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 127*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 128*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v8 129*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v9 130*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 131*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v7, v7, s0, s1 132*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v6, v6, s0, s1 133*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v5, v5, s0, s1 134*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v4, v4, s0, s1 135*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v3, v3, s0, s1 136*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 137*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_clause 0x1 138*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off offset:16 139*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off 140*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 141*35f7b60aSVikram Hegde %v = call <3 x ptr> @llvm.amdgcn.permlane16.v3p0(<3 x ptr> %src0, <3 x ptr> %src0, i32 %src1, i32 %src2, i1 false, i1 false) 142*35f7b60aSVikram Hegde store <3 x ptr> %v, ptr addrspace(1) %out 143*35f7b60aSVikram Hegde ret void 144*35f7b60aSVikram Hegde} 145*35f7b60aSVikram Hegde 146*35f7b60aSVikram Hegdedefine void @v_permlanex16_v3p0(ptr addrspace(1) %out, <3 x ptr> %src0, i32 %src1, i32 %src2) { 147*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_v3p0: 148*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 149*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 150*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v8 151*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v9 152*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v7, v7, s4, s5 153*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v6, v6, s4, s5 154*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v5, v5, s4, s5 155*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v4, v4, s4, s5 156*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v3, v3, s4, s5 157*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v2, v2, s4, s5 158*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dwordx2 v[0:1], v[6:7], off offset:16 159*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off 160*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 161*35f7b60aSVikram Hegde; 162*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_v3p0: 163*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 164*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 165*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v8 166*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v9 167*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 168*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v7, v7, s0, s1 169*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v6, v6, s0, s1 170*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v5, v5, s0, s1 171*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v4, v4, s0, s1 172*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v3, v3, s0, s1 173*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 174*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_clause 0x1 175*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off offset:16 176*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off 177*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 178*35f7b60aSVikram Hegde; 179*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_v3p0: 180*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 181*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 182*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 183*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 184*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 185*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 186*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v8 187*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v9 188*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 189*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v7, v7, s0, s1 190*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v6, v6, s0, s1 191*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v5, v5, s0, s1 192*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v4, v4, s0, s1 193*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v3, v3, s0, s1 194*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 195*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_clause 0x1 196*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b64 v[0:1], v[6:7], off offset:16 197*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b128 v[0:1], v[2:5], off 198*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 199*35f7b60aSVikram Hegde %v = call <3 x ptr> @llvm.amdgcn.permlanex16.v3p0(<3 x ptr> %src0, <3 x ptr> %src0, i32 %src1, i32 %src2, i1 false, i1 false) 200*35f7b60aSVikram Hegde store <3 x ptr> %v, ptr addrspace(1) %out 201*35f7b60aSVikram Hegde ret void 202*35f7b60aSVikram Hegde} 203*35f7b60aSVikram Hegde 204*35f7b60aSVikram Hegdedefine void @v_permlane16_p3(ptr addrspace(1) %out, ptr addrspace(3) %src0, i32 %src1, i32 %src2) { 205*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_p3: 206*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 207*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 208*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v3 209*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v4 210*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v2, v2, s4, s5 211*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dword v[0:1], v2, off 212*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 213*35f7b60aSVikram Hegde; 214*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_p3: 215*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 216*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 217*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v3 218*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v4 219*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 220*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 221*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v2, off 222*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 223*35f7b60aSVikram Hegde; 224*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_p3: 225*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 226*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 227*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 228*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 229*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 230*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 231*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v3 232*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v4 233*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 234*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 235*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b32 v[0:1], v2, off 236*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 237*35f7b60aSVikram Hegde %v = call ptr addrspace(3) @llvm.amdgcn.permlane16.p3(ptr addrspace(3) %src0, ptr addrspace(3) %src0, i32 %src1, i32 %src2, i1 false, i1 false) 238*35f7b60aSVikram Hegde store ptr addrspace(3) %v, ptr addrspace(1) %out 239*35f7b60aSVikram Hegde ret void 240*35f7b60aSVikram Hegde} 241*35f7b60aSVikram Hegde 242*35f7b60aSVikram Hegdedefine void @v_permlanex16_p3(ptr addrspace(1) %out, ptr addrspace(3) %src0, i32 %src1, i32 %src2) { 243*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_p3: 244*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 245*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 246*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v3 247*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v4 248*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v2, v2, s4, s5 249*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dword v[0:1], v2, off 250*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 251*35f7b60aSVikram Hegde; 252*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_p3: 253*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 254*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 255*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v3 256*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v4 257*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 258*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 259*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v2, off 260*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 261*35f7b60aSVikram Hegde; 262*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_p3: 263*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 264*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 265*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 266*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 267*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 268*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 269*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v3 270*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v4 271*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 272*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 273*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b32 v[0:1], v2, off 274*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 275*35f7b60aSVikram Hegde %v = call ptr addrspace(3) @llvm.amdgcn.permlanex16.p3(ptr addrspace(3) %src0, ptr addrspace(3) %src0, i32 %src1, i32 %src2, i1 false, i1 false) 276*35f7b60aSVikram Hegde store ptr addrspace(3) %v, ptr addrspace(1) %out 277*35f7b60aSVikram Hegde ret void 278*35f7b60aSVikram Hegde} 279*35f7b60aSVikram Hegde 280*35f7b60aSVikram Hegdedefine void @v_permlane16_v3p3(ptr addrspace(1) %out, <3 x ptr addrspace(3)> %src0, i32 %src1, i32 %src2) { 281*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_v3p3: 282*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 283*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 284*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v5 285*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v6 286*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v4, v4, s4, s5 287*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v3, v3, s4, s5 288*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v2, v2, s4, s5 289*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dwordx3 v[0:1], v[2:4], off 290*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 291*35f7b60aSVikram Hegde; 292*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_v3p3: 293*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 294*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 295*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v5 296*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v6 297*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 298*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v4, v4, s0, s1 299*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v3, v3, s0, s1 300*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 301*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b96 v[0:1], v[2:4], off 302*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 303*35f7b60aSVikram Hegde; 304*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_v3p3: 305*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 306*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 307*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 308*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 309*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 310*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 311*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v5 312*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v6 313*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 314*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v4, v4, s0, s1 315*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v3, v3, s0, s1 316*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 317*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b96 v[0:1], v[2:4], off 318*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 319*35f7b60aSVikram Hegde %v = call <3 x ptr addrspace(3)> @llvm.amdgcn.permlane16.v3p3(<3 x ptr addrspace(3)> %src0, <3 x ptr addrspace(3)> %src0, i32 %src1, i32 %src2, i1 false, i1 false) 320*35f7b60aSVikram Hegde store <3 x ptr addrspace(3)> %v, ptr addrspace(1) %out 321*35f7b60aSVikram Hegde ret void 322*35f7b60aSVikram Hegde} 323*35f7b60aSVikram Hegde 324*35f7b60aSVikram Hegdedefine void @v_permlanex16_v3p3(ptr addrspace(1) %out, <3 x ptr addrspace(3)> %src0, i32 %src1, i32 %src2) { 325*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_v3p3: 326*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 327*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 328*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v5 329*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v6 330*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v4, v4, s4, s5 331*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v3, v3, s4, s5 332*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v2, v2, s4, s5 333*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dwordx3 v[0:1], v[2:4], off 334*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 335*35f7b60aSVikram Hegde; 336*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_v3p3: 337*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 338*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v5 340*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v6 341*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 342*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v4, v4, s0, s1 343*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v3, v3, s0, s1 344*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 345*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b96 v[0:1], v[2:4], off 346*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 347*35f7b60aSVikram Hegde; 348*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_v3p3: 349*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 350*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 351*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 352*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 353*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 354*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 355*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v5 356*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v6 357*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 358*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v4, v4, s0, s1 359*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v3, v3, s0, s1 360*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 361*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b96 v[0:1], v[2:4], off 362*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 363*35f7b60aSVikram Hegde %v = call <3 x ptr addrspace(3)> @llvm.amdgcn.permlanex16.v3p3(<3 x ptr addrspace(3)> %src0, <3 x ptr addrspace(3)> %src0, i32 %src1, i32 %src2, i1 false, i1 false) 364*35f7b60aSVikram Hegde store <3 x ptr addrspace(3)> %v, ptr addrspace(1) %out 365*35f7b60aSVikram Hegde ret void 366*35f7b60aSVikram Hegde} 367*35f7b60aSVikram Hegde 368*35f7b60aSVikram Hegdedefine void @v_permlane16_p5(ptr addrspace(1) %out, ptr addrspace(5) %src0, i32 %src1, i32 %src2) { 369*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_p5: 370*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 371*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 372*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v3 373*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v4 374*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v2, v2, s4, s5 375*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dword v[0:1], v2, off 376*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 377*35f7b60aSVikram Hegde; 378*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_p5: 379*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 380*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 381*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v3 382*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v4 383*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 384*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 385*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v2, off 386*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 387*35f7b60aSVikram Hegde; 388*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_p5: 389*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 390*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 391*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 392*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 393*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 394*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 395*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v3 396*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v4 397*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 398*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 399*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b32 v[0:1], v2, off 400*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 401*35f7b60aSVikram Hegde %v = call ptr addrspace(5) @llvm.amdgcn.permlane16.p5(ptr addrspace(5) %src0, ptr addrspace(5) %src0, i32 %src1, i32 %src2, i1 false, i1 false) 402*35f7b60aSVikram Hegde store ptr addrspace(5) %v, ptr addrspace(1) %out 403*35f7b60aSVikram Hegde ret void 404*35f7b60aSVikram Hegde} 405*35f7b60aSVikram Hegde 406*35f7b60aSVikram Hegdedefine void @v_permlanex16_p5(ptr addrspace(1) %out, ptr addrspace(5) %src0, i32 %src1, i32 %src2) { 407*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_p5: 408*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 409*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 410*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v3 411*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v4 412*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v2, v2, s4, s5 413*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dword v[0:1], v2, off 414*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 415*35f7b60aSVikram Hegde; 416*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_p5: 417*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 418*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 419*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v3 420*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v4 421*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 422*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 423*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v2, off 424*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 425*35f7b60aSVikram Hegde; 426*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_p5: 427*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 428*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 429*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 430*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 431*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 432*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 433*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v3 434*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v4 435*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 436*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 437*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b32 v[0:1], v2, off 438*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 439*35f7b60aSVikram Hegde %v = call ptr addrspace(5) @llvm.amdgcn.permlanex16.p5(ptr addrspace(5) %src0, ptr addrspace(5) %src0, i32 %src1, i32 %src2, i1 false, i1 false) 440*35f7b60aSVikram Hegde store ptr addrspace(5) %v, ptr addrspace(1) %out 441*35f7b60aSVikram Hegde ret void 442*35f7b60aSVikram Hegde} 443*35f7b60aSVikram Hegde 444*35f7b60aSVikram Hegdedefine void @v_permlane16_v3p5(ptr addrspace(1) %out, <3 x ptr addrspace(5)> %src0, i32 %src1, i32 %src2) { 445*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_v3p5: 446*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 447*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 448*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v5 449*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v6 450*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v4, v4, s4, s5 451*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v3, v3, s4, s5 452*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v2, v2, s4, s5 453*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dwordx3 v[0:1], v[2:4], off 454*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 455*35f7b60aSVikram Hegde; 456*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_v3p5: 457*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 458*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 459*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v5 460*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v6 461*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 462*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v4, v4, s0, s1 463*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v3, v3, s0, s1 464*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 465*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b96 v[0:1], v[2:4], off 466*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 467*35f7b60aSVikram Hegde; 468*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_v3p5: 469*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 470*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 471*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 472*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 473*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 474*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 475*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v5 476*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v6 477*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 478*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v4, v4, s0, s1 479*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v3, v3, s0, s1 480*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 481*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b96 v[0:1], v[2:4], off 482*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 483*35f7b60aSVikram Hegde %v = call <3 x ptr addrspace(5)> @llvm.amdgcn.permlane16.v3p5(<3 x ptr addrspace(5)> %src0, <3 x ptr addrspace(5)> %src0, i32 %src1, i32 %src2, i1 false, i1 false) 484*35f7b60aSVikram Hegde store <3 x ptr addrspace(5)> %v, ptr addrspace(1) %out 485*35f7b60aSVikram Hegde ret void 486*35f7b60aSVikram Hegde} 487*35f7b60aSVikram Hegde 488*35f7b60aSVikram Hegdedefine void @v_permlanex16_v3p5(ptr addrspace(1) %out, <3 x ptr addrspace(5)> %src0, i32 %src1, i32 %src2) { 489*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_v3p5: 490*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 491*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 492*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v5 493*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v6 494*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v4, v4, s4, s5 495*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v3, v3, s4, s5 496*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v2, v2, s4, s5 497*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dwordx3 v[0:1], v[2:4], off 498*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 499*35f7b60aSVikram Hegde; 500*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_v3p5: 501*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 502*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 503*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v5 504*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v6 505*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 506*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v4, v4, s0, s1 507*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v3, v3, s0, s1 508*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 509*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b96 v[0:1], v[2:4], off 510*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 511*35f7b60aSVikram Hegde; 512*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_v3p5: 513*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 514*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 515*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 516*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 517*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 518*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 519*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v5 520*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v6 521*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 522*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v4, v4, s0, s1 523*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v3, v3, s0, s1 524*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 525*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b96 v[0:1], v[2:4], off 526*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 527*35f7b60aSVikram Hegde %v = call <3 x ptr addrspace(5)> @llvm.amdgcn.permlanex16.v3p5(<3 x ptr addrspace(5)> %src0, <3 x ptr addrspace(5)> %src0, i32 %src1, i32 %src2, i1 false, i1 false) 528*35f7b60aSVikram Hegde store <3 x ptr addrspace(5)> %v, ptr addrspace(1) %out 529*35f7b60aSVikram Hegde ret void 530*35f7b60aSVikram Hegde} 531*35f7b60aSVikram Hegde 532*35f7b60aSVikram Hegdedefine void @v_permlane16_p6(ptr addrspace(1) %out, ptr addrspace(6) %src0, i32 %src1, i32 %src2) { 533*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_p6: 534*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 535*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 536*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v3 537*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v4 538*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v2, v2, s4, s5 539*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dword v[0:1], v2, off 540*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 541*35f7b60aSVikram Hegde; 542*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_p6: 543*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 544*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 545*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v3 546*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v4 547*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 548*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 549*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v2, off 550*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 551*35f7b60aSVikram Hegde; 552*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_p6: 553*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 554*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 555*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 556*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 557*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 558*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 559*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v3 560*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v4 561*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 562*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 563*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b32 v[0:1], v2, off 564*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 565*35f7b60aSVikram Hegde %v = call ptr addrspace(6) @llvm.amdgcn.permlane16.p6(ptr addrspace(6) %src0, ptr addrspace(6) %src0, i32 %src1, i32 %src2, i1 false, i1 false) 566*35f7b60aSVikram Hegde store ptr addrspace(6) %v, ptr addrspace(1) %out 567*35f7b60aSVikram Hegde ret void 568*35f7b60aSVikram Hegde} 569*35f7b60aSVikram Hegde 570*35f7b60aSVikram Hegdedefine void @v_permlanex16_p6(ptr addrspace(1) %out, ptr addrspace(6) %src0, i32 %src1, i32 %src2) { 571*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_p6: 572*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 573*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 574*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v3 575*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v4 576*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v2, v2, s4, s5 577*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dword v[0:1], v2, off 578*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 579*35f7b60aSVikram Hegde; 580*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_p6: 581*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 582*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 583*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v3 584*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v4 585*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 586*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 587*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b32 v[0:1], v2, off 588*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 589*35f7b60aSVikram Hegde; 590*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_p6: 591*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 592*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 593*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 594*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 595*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 596*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 597*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v3 598*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v4 599*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 600*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 601*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b32 v[0:1], v2, off 602*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 603*35f7b60aSVikram Hegde %v = call ptr addrspace(6) @llvm.amdgcn.permlanex16.p6(ptr addrspace(6) %src0, ptr addrspace(6) %src0, i32 %src1, i32 %src2, i1 false, i1 false) 604*35f7b60aSVikram Hegde store ptr addrspace(6) %v, ptr addrspace(1) %out 605*35f7b60aSVikram Hegde ret void 606*35f7b60aSVikram Hegde} 607*35f7b60aSVikram Hegde 608*35f7b60aSVikram Hegdedefine void @v_permlane16_v3p6(ptr addrspace(1) %out, <3 x ptr addrspace(6)> %src0, i32 %src1, i32 %src2) { 609*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlane16_v3p6: 610*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 611*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 612*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v5 613*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v6 614*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v4, v4, s4, s5 615*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v3, v3, s4, s5 616*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlane16_b32 v2, v2, s4, s5 617*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dwordx3 v[0:1], v[2:4], off 618*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 619*35f7b60aSVikram Hegde; 620*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlane16_v3p6: 621*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 622*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 623*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v5 624*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v6 625*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 626*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v4, v4, s0, s1 627*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v3, v3, s0, s1 628*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 629*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b96 v[0:1], v[2:4], off 630*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 631*35f7b60aSVikram Hegde; 632*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlane16_v3p6: 633*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 634*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 635*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 636*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 637*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 638*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 639*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v5 640*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v6 641*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 642*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v4, v4, s0, s1 643*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v3, v3, s0, s1 644*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlane16_b32 v2, v2, s0, s1 645*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b96 v[0:1], v[2:4], off 646*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 647*35f7b60aSVikram Hegde %v = call <3 x ptr addrspace(6)> @llvm.amdgcn.permlane16.v3p6(<3 x ptr addrspace(6)> %src0, <3 x ptr addrspace(6)> %src0, i32 %src1, i32 %src2, i1 false, i1 false) 648*35f7b60aSVikram Hegde store <3 x ptr addrspace(6)> %v, ptr addrspace(1) %out 649*35f7b60aSVikram Hegde ret void 650*35f7b60aSVikram Hegde} 651*35f7b60aSVikram Hegde 652*35f7b60aSVikram Hegdedefine void @v_permlanex16_v3p6(ptr addrspace(1) %out, <3 x ptr addrspace(6)> %src0, i32 %src1, i32 %src2) { 653*35f7b60aSVikram Hegde; GFX10-SDAG-LABEL: v_permlanex16_v3p6: 654*35f7b60aSVikram Hegde; GFX10-SDAG: ; %bb.0: 655*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 656*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v5 657*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_readfirstlane_b32 s5, v6 658*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v4, v4, s4, s5 659*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v3, v3, s4, s5 660*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: v_permlanex16_b32 v2, v2, s4, s5 661*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: global_store_dwordx3 v[0:1], v[2:4], off 662*35f7b60aSVikram Hegde; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 663*35f7b60aSVikram Hegde; 664*35f7b60aSVikram Hegde; GFX11-SDAG-LABEL: v_permlanex16_v3p6: 665*35f7b60aSVikram Hegde; GFX11-SDAG: ; %bb.0: 666*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 667*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v5 668*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_readfirstlane_b32 s1, v6 669*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 670*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v4, v4, s0, s1 671*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v3, v3, s0, s1 672*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 673*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: global_store_b96 v[0:1], v[2:4], off 674*35f7b60aSVikram Hegde; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 675*35f7b60aSVikram Hegde; 676*35f7b60aSVikram Hegde; GFX12-SDAG-LABEL: v_permlanex16_v3p6: 677*35f7b60aSVikram Hegde; GFX12-SDAG: ; %bb.0: 678*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0 679*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_expcnt 0x0 680*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0 681*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 682*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 683*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v5 684*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v6 685*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 686*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v4, v4, s0, s1 687*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v3, v3, s0, s1 688*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: v_permlanex16_b32 v2, v2, s0, s1 689*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: global_store_b96 v[0:1], v[2:4], off 690*35f7b60aSVikram Hegde; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31] 691*35f7b60aSVikram Hegde %v = call <3 x ptr addrspace(6)> @llvm.amdgcn.permlanex16.v3p6(<3 x ptr addrspace(6)> %src0, <3 x ptr addrspace(6)> %src0, i32 %src1, i32 %src2, i1 false, i1 false) 692*35f7b60aSVikram Hegde store <3 x ptr addrspace(6)> %v, ptr addrspace(1) %out 693*35f7b60aSVikram Hegde ret void 694*35f7b60aSVikram Hegde} 695