1*5feb32baSVikram Hegde; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2*5feb32baSVikram Hegde; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX802-SDAG %s 3*5feb32baSVikram Hegde; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1010-SDAG %s 4*5feb32baSVikram Hegde; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GFX1100-SDAG %s 5*5feb32baSVikram Hegde 6*5feb32baSVikram Hegdedefine void @test_writelane_p0(ptr addrspace(1) %out, ptr %src, i32 %src1) { 7*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_p0: 8*5feb32baSVikram Hegde; GFX802-SDAG: ; %bb.0: 9*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_load_dwordx2 v[5:6], v[0:1] 11*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v4 12*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v3 13*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v2 14*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 15*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_nop 0 16*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v6, s4, m0 17*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v5, s5, m0 18*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[5:6] 19*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 20*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 21*5feb32baSVikram Hegde; 22*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_p0: 23*5feb32baSVikram Hegde; GFX1010-SDAG: ; %bb.0: 24*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_load_dwordx2 v[5:6], v[0:1], off 26*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v3 27*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v4 28*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v2 29*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 30*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v6, s4, s5 31*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v5, s6, s5 32*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_store_dwordx2 v[0:1], v[5:6], off 33*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 34*5feb32baSVikram Hegde; 35*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_p0: 36*5feb32baSVikram Hegde; GFX1100-SDAG: ; %bb.0: 37*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 38*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_load_b64 v[5:6], v[0:1], off 39*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v3 40*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v4 41*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v2 42*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 43*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 44*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v6, s0, s1 45*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v5, s2, s1 46*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_store_b64 v[0:1], v[5:6], off 47*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 48*5feb32baSVikram Hegde %oldval = load ptr, ptr addrspace(1) %out 49*5feb32baSVikram Hegde %writelane = call ptr @llvm.amdgcn.writelane.p0(ptr %src, i32 %src1, ptr %oldval) 50*5feb32baSVikram Hegde store ptr %writelane, ptr addrspace(1) %out, align 4 51*5feb32baSVikram Hegde ret void 52*5feb32baSVikram Hegde} 53*5feb32baSVikram Hegde 54*5feb32baSVikram Hegdedefine void @test_writelane_v3p0(ptr addrspace(1) %out, <3 x ptr> %src, i32 %src1) { 55*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_v3p0: 56*5feb32baSVikram Hegde; GFX802-SDAG: ; %bb.0: 57*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_add_u32_e32 v13, vcc, 16, v0 59*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_load_dwordx4 v[9:12], v[0:1] 60*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_addc_u32_e32 v14, vcc, 0, v1, vcc 61*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_load_dwordx2 v[15:16], v[13:14] 62*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v8 63*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v5 64*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s7, v4 65*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s8, v3 66*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s9, v2 67*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v7 68*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v6 69*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(1) 70*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v12, s6, m0 71*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v11, s7, m0 72*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v10, s8, m0 73*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v9, s9, m0 74*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 75*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v16, s4, m0 76*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v15, s5, m0 77*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[9:12] 78*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_store_dwordx2 v[13:14], v[15:16] 79*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 80*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 81*5feb32baSVikram Hegde; 82*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_v3p0: 83*5feb32baSVikram Hegde; GFX1010-SDAG: ; %bb.0: 84*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 85*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_clause 0x1 86*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_load_dwordx2 v[13:14], v[0:1], off offset:16 87*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_load_dwordx4 v[9:12], v[0:1], off 88*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v8 89*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v5 90*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s8, v4 91*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s9, v3 92*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s10, v2 93*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v7 94*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v6 95*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(1) 96*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v14, s4, s5 97*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 98*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v12, s7, s5 99*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v11, s8, s5 100*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v10, s9, s5 101*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v9, s10, s5 102*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v13, s6, s5 103*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[9:12], off 104*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_store_dwordx2 v[0:1], v[13:14], off offset:16 105*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 106*5feb32baSVikram Hegde; 107*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_v3p0: 108*5feb32baSVikram Hegde; GFX1100-SDAG: ; %bb.0: 109*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 110*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_clause 0x1 111*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_load_b64 v[13:14], v[0:1], off offset:16 112*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_load_b128 v[9:12], v[0:1], off 113*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v8 114*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v5 115*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s4, v4 116*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s5, v3 117*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s6, v2 118*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v7 119*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v6 120*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(1) 121*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 122*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v14, s0, s1 123*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 124*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v12, s3, s1 125*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v11, s4, s1 126*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v10, s5, s1 127*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v9, s6, s1 128*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v13, s2, s1 129*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_clause 0x1 130*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[9:12], off 131*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_store_b64 v[0:1], v[13:14], off offset:16 132*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 133*5feb32baSVikram Hegde %oldval = load <3 x ptr>, ptr addrspace(1) %out 134*5feb32baSVikram Hegde %writelane = call <3 x ptr> @llvm.amdgcn.writelane.v3p0(<3 x ptr> %src, i32 %src1, <3 x ptr> %oldval) 135*5feb32baSVikram Hegde store <3 x ptr> %writelane, ptr addrspace(1) %out, align 4 136*5feb32baSVikram Hegde ret void 137*5feb32baSVikram Hegde} 138*5feb32baSVikram Hegde 139*5feb32baSVikram Hegdedefine void @test_writelane_p3(ptr addrspace(1) %out, ptr addrspace(3) %src, i32 %src1) { 140*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_p3: 141*5feb32baSVikram Hegde; GFX802-SDAG: ; %bb.0: 142*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 143*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_load_dword v4, v[0:1] 144*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3 145*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2 146*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 147*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_nop 1 148*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0 149*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v4 150*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 151*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 152*5feb32baSVikram Hegde; 153*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_p3: 154*5feb32baSVikram Hegde; GFX1010-SDAG: ; %bb.0: 155*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 156*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_load_dword v4, v[0:1], off 157*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2 158*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3 159*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 160*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5 161*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_store_dword v[0:1], v4, off 162*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 163*5feb32baSVikram Hegde; 164*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_p3: 165*5feb32baSVikram Hegde; GFX1100-SDAG: ; %bb.0: 166*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 167*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_load_b32 v4, v[0:1], off 168*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2 169*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3 170*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 171*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 172*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1 173*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_store_b32 v[0:1], v4, off 174*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 175*5feb32baSVikram Hegde %oldval = load ptr addrspace(3), ptr addrspace(1) %out 176*5feb32baSVikram Hegde %writelane = call ptr addrspace(3) @llvm.amdgcn.writelane.p3(ptr addrspace(3) %src, i32 %src1, ptr addrspace(3) %oldval) 177*5feb32baSVikram Hegde store ptr addrspace(3) %writelane, ptr addrspace(1) %out, align 4 178*5feb32baSVikram Hegde ret void 179*5feb32baSVikram Hegde} 180*5feb32baSVikram Hegde 181*5feb32baSVikram Hegdedefine void @test_writelane_v3p3(ptr addrspace(1) %out, <3 x ptr addrspace(3)> %src, i32 %src1) { 182*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_v3p3: 183*5feb32baSVikram Hegde; GFX802-SDAG: ; %bb.0: 184*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 185*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_load_dwordx3 v[6:8], v[0:1] 186*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v5 187*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v4 188*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v3 189*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v2 190*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 191*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v8, s4, m0 192*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v7, s5, m0 193*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v6, s6, m0 194*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_store_dwordx3 v[0:1], v[6:8] 195*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 196*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 197*5feb32baSVikram Hegde; 198*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_v3p3: 199*5feb32baSVikram Hegde; GFX1010-SDAG: ; %bb.0: 200*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_load_dwordx3 v[6:8], v[0:1], off 202*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v4 203*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v5 204*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v3 205*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v2 206*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 207*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v8, s4, s5 208*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v7, s6, s5 209*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v6, s7, s5 210*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_store_dwordx3 v[0:1], v[6:8], off 211*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 212*5feb32baSVikram Hegde; 213*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_v3p3: 214*5feb32baSVikram Hegde; GFX1100-SDAG: ; %bb.0: 215*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 216*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_load_b96 v[6:8], v[0:1], off 217*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v4 218*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v5 219*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v3 220*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v2 221*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 222*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 223*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v8, s0, s1 224*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v7, s2, s1 225*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) 226*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v6, s3, s1 227*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_store_b96 v[0:1], v[6:8], off 228*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 229*5feb32baSVikram Hegde %oldval = load <3 x ptr addrspace(3)>, ptr addrspace(1) %out 230*5feb32baSVikram Hegde %writelane = call <3 x ptr addrspace(3)> @llvm.amdgcn.writelane.v3p3(<3 x ptr addrspace(3)> %src, i32 %src1, <3 x ptr addrspace(3)> %oldval) 231*5feb32baSVikram Hegde store <3 x ptr addrspace(3)> %writelane, ptr addrspace(1) %out, align 4 232*5feb32baSVikram Hegde ret void 233*5feb32baSVikram Hegde} 234*5feb32baSVikram Hegde 235*5feb32baSVikram Hegdedefine void @test_writelane_p5(ptr addrspace(1) %out, ptr addrspace(5) %src, i32 %src1) { 236*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_p5: 237*5feb32baSVikram Hegde; GFX802-SDAG: ; %bb.0: 238*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 239*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_load_dword v4, v[0:1] 240*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3 241*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2 242*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 243*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_nop 1 244*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0 245*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v4 246*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 247*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 248*5feb32baSVikram Hegde; 249*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_p5: 250*5feb32baSVikram Hegde; GFX1010-SDAG: ; %bb.0: 251*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 252*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_load_dword v4, v[0:1], off 253*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2 254*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3 255*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 256*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5 257*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_store_dword v[0:1], v4, off 258*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 259*5feb32baSVikram Hegde; 260*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_p5: 261*5feb32baSVikram Hegde; GFX1100-SDAG: ; %bb.0: 262*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 263*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_load_b32 v4, v[0:1], off 264*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2 265*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3 266*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 267*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 268*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1 269*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_store_b32 v[0:1], v4, off 270*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 271*5feb32baSVikram Hegde %oldval = load ptr addrspace(5), ptr addrspace(1) %out 272*5feb32baSVikram Hegde %writelane = call ptr addrspace(5) @llvm.amdgcn.writelane.p5(ptr addrspace(5) %src, i32 %src1, ptr addrspace(5) %oldval) 273*5feb32baSVikram Hegde store ptr addrspace(5) %writelane, ptr addrspace(1) %out, align 4 274*5feb32baSVikram Hegde ret void 275*5feb32baSVikram Hegde} 276*5feb32baSVikram Hegde 277*5feb32baSVikram Hegdedefine void @test_writelane_v3p5(ptr addrspace(1) %out, <3 x ptr addrspace(5)> %src, i32 %src1) { 278*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_v3p5: 279*5feb32baSVikram Hegde; GFX802-SDAG: ; %bb.0: 280*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 281*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_load_dwordx3 v[6:8], v[0:1] 282*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v5 283*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v4 284*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v3 285*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v2 286*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 287*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v8, s4, m0 288*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v7, s5, m0 289*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v6, s6, m0 290*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_store_dwordx3 v[0:1], v[6:8] 291*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 292*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 293*5feb32baSVikram Hegde; 294*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_v3p5: 295*5feb32baSVikram Hegde; GFX1010-SDAG: ; %bb.0: 296*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 297*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_load_dwordx3 v[6:8], v[0:1], off 298*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v4 299*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v5 300*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v3 301*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v2 302*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 303*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v8, s4, s5 304*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v7, s6, s5 305*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v6, s7, s5 306*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_store_dwordx3 v[0:1], v[6:8], off 307*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 308*5feb32baSVikram Hegde; 309*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_v3p5: 310*5feb32baSVikram Hegde; GFX1100-SDAG: ; %bb.0: 311*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 312*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_load_b96 v[6:8], v[0:1], off 313*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v4 314*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v5 315*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v3 316*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v2 317*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 318*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 319*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v8, s0, s1 320*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v7, s2, s1 321*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) 322*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v6, s3, s1 323*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_store_b96 v[0:1], v[6:8], off 324*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 325*5feb32baSVikram Hegde %oldval = load <3 x ptr addrspace(5)>, ptr addrspace(1) %out 326*5feb32baSVikram Hegde %writelane = call <3 x ptr addrspace(5)> @llvm.amdgcn.writelane.v3p5(<3 x ptr addrspace(5)> %src, i32 %src1, <3 x ptr addrspace(5)> %oldval) 327*5feb32baSVikram Hegde store <3 x ptr addrspace(5)> %writelane, ptr addrspace(1) %out, align 4 328*5feb32baSVikram Hegde ret void 329*5feb32baSVikram Hegde} 330*5feb32baSVikram Hegde 331*5feb32baSVikram Hegdedefine void @test_writelane_p6(ptr addrspace(1) %out, ptr addrspace(6) %src, i32 %src1) { 332*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_p6: 333*5feb32baSVikram Hegde; GFX802-SDAG: ; %bb.0: 334*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 335*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_load_dword v4, v[0:1] 336*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3 337*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2 338*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 339*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_nop 1 340*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0 341*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v4 342*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 343*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 344*5feb32baSVikram Hegde; 345*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_p6: 346*5feb32baSVikram Hegde; GFX1010-SDAG: ; %bb.0: 347*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 348*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_load_dword v4, v[0:1], off 349*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2 350*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3 351*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 352*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5 353*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_store_dword v[0:1], v4, off 354*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 355*5feb32baSVikram Hegde; 356*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_p6: 357*5feb32baSVikram Hegde; GFX1100-SDAG: ; %bb.0: 358*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 359*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_load_b32 v4, v[0:1], off 360*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2 361*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3 362*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 363*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 364*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1 365*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_store_b32 v[0:1], v4, off 366*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 367*5feb32baSVikram Hegde %oldval = load ptr addrspace(6), ptr addrspace(1) %out 368*5feb32baSVikram Hegde %writelane = call ptr addrspace(6) @llvm.amdgcn.writelane.p6(ptr addrspace(6) %src, i32 %src1, ptr addrspace(6) %oldval) 369*5feb32baSVikram Hegde store ptr addrspace(6) %writelane, ptr addrspace(1) %out, align 4 370*5feb32baSVikram Hegde ret void 371*5feb32baSVikram Hegde} 372*5feb32baSVikram Hegde 373*5feb32baSVikram Hegdedefine void @test_writelane_v3p6(ptr addrspace(1) %out, <3 x ptr addrspace(6)> %src, i32 %src1) { 374*5feb32baSVikram Hegde; GFX802-SDAG-LABEL: test_writelane_v3p6: 375*5feb32baSVikram Hegde; GFX802-SDAG: ; %bb.0: 376*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 377*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_load_dwordx3 v[6:8], v[0:1] 378*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v5 379*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v4 380*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v3 381*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v2 382*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 383*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v8, s4, m0 384*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v7, s5, m0 385*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: v_writelane_b32 v6, s6, m0 386*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: flat_store_dwordx3 v[0:1], v[6:8] 387*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 388*5feb32baSVikram Hegde; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 389*5feb32baSVikram Hegde; 390*5feb32baSVikram Hegde; GFX1010-SDAG-LABEL: test_writelane_v3p6: 391*5feb32baSVikram Hegde; GFX1010-SDAG: ; %bb.0: 392*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 393*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_load_dwordx3 v[6:8], v[0:1], off 394*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v4 395*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v5 396*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v3 397*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v2 398*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 399*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v8, s4, s5 400*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v7, s6, s5 401*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: v_writelane_b32 v6, s7, s5 402*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: global_store_dwordx3 v[0:1], v[6:8], off 403*5feb32baSVikram Hegde; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 404*5feb32baSVikram Hegde; 405*5feb32baSVikram Hegde; GFX1100-SDAG-LABEL: test_writelane_v3p6: 406*5feb32baSVikram Hegde; GFX1100-SDAG: ; %bb.0: 407*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 408*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_load_b96 v[6:8], v[0:1], off 409*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v4 410*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v5 411*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v3 412*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v2 413*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 414*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 415*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v8, s0, s1 416*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v7, s2, s1 417*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) 418*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: v_writelane_b32 v6, s3, s1 419*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: global_store_b96 v[0:1], v[6:8], off 420*5feb32baSVikram Hegde; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 421*5feb32baSVikram Hegde %oldval = load <3 x ptr addrspace(6)>, ptr addrspace(1) %out 422*5feb32baSVikram Hegde %writelane = call <3 x ptr addrspace(6)> @llvm.amdgcn.writelane.v3p6(<3 x ptr addrspace(6)> %src, i32 %src1, <3 x ptr addrspace(6)> %oldval) 423*5feb32baSVikram Hegde store <3 x ptr addrspace(6)> %writelane, ptr addrspace(1) %out, align 4 424*5feb32baSVikram Hegde ret void 425*5feb32baSVikram Hegde} 426