1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX802-SDAG %s 3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1010-SDAG %s 4; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GFX1100-SDAG %s 5 6define void @test_writelane_p0(ptr addrspace(1) %out, ptr %src, i32 %src1) { 7; GFX802-SDAG-LABEL: test_writelane_p0: 8; GFX802-SDAG: ; %bb.0: 9; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX802-SDAG-NEXT: flat_load_dwordx2 v[5:6], v[0:1] 11; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v4 12; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v3 13; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v2 14; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 15; GFX802-SDAG-NEXT: s_nop 0 16; GFX802-SDAG-NEXT: v_writelane_b32 v6, s4, m0 17; GFX802-SDAG-NEXT: v_writelane_b32 v5, s5, m0 18; GFX802-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[5:6] 19; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 20; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX1010-SDAG-LABEL: test_writelane_p0: 23; GFX1010-SDAG: ; %bb.0: 24; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX1010-SDAG-NEXT: global_load_dwordx2 v[5:6], v[0:1], off 26; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v3 27; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v4 28; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v2 29; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 30; GFX1010-SDAG-NEXT: v_writelane_b32 v6, s4, s5 31; GFX1010-SDAG-NEXT: v_writelane_b32 v5, s6, s5 32; GFX1010-SDAG-NEXT: global_store_dwordx2 v[0:1], v[5:6], off 33; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 34; 35; GFX1100-SDAG-LABEL: test_writelane_p0: 36; GFX1100-SDAG: ; %bb.0: 37; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 38; GFX1100-SDAG-NEXT: global_load_b64 v[5:6], v[0:1], off 39; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v3 40; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v4 41; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v2 42; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 43; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 44; GFX1100-SDAG-NEXT: v_writelane_b32 v6, s0, s1 45; GFX1100-SDAG-NEXT: v_writelane_b32 v5, s2, s1 46; GFX1100-SDAG-NEXT: global_store_b64 v[0:1], v[5:6], off 47; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 48 %oldval = load ptr, ptr addrspace(1) %out 49 %writelane = call ptr @llvm.amdgcn.writelane.p0(ptr %src, i32 %src1, ptr %oldval) 50 store ptr %writelane, ptr addrspace(1) %out, align 4 51 ret void 52} 53 54define void @test_writelane_v3p0(ptr addrspace(1) %out, <3 x ptr> %src, i32 %src1) { 55; GFX802-SDAG-LABEL: test_writelane_v3p0: 56; GFX802-SDAG: ; %bb.0: 57; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58; GFX802-SDAG-NEXT: v_add_u32_e32 v13, vcc, 16, v0 59; GFX802-SDAG-NEXT: flat_load_dwordx4 v[9:12], v[0:1] 60; GFX802-SDAG-NEXT: v_addc_u32_e32 v14, vcc, 0, v1, vcc 61; GFX802-SDAG-NEXT: flat_load_dwordx2 v[15:16], v[13:14] 62; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v8 63; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v5 64; GFX802-SDAG-NEXT: v_readfirstlane_b32 s7, v4 65; GFX802-SDAG-NEXT: v_readfirstlane_b32 s8, v3 66; GFX802-SDAG-NEXT: v_readfirstlane_b32 s9, v2 67; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v7 68; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v6 69; GFX802-SDAG-NEXT: s_waitcnt vmcnt(1) 70; GFX802-SDAG-NEXT: v_writelane_b32 v12, s6, m0 71; GFX802-SDAG-NEXT: v_writelane_b32 v11, s7, m0 72; GFX802-SDAG-NEXT: v_writelane_b32 v10, s8, m0 73; GFX802-SDAG-NEXT: v_writelane_b32 v9, s9, m0 74; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 75; GFX802-SDAG-NEXT: v_writelane_b32 v16, s4, m0 76; GFX802-SDAG-NEXT: v_writelane_b32 v15, s5, m0 77; GFX802-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[9:12] 78; GFX802-SDAG-NEXT: flat_store_dwordx2 v[13:14], v[15:16] 79; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 80; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 81; 82; GFX1010-SDAG-LABEL: test_writelane_v3p0: 83; GFX1010-SDAG: ; %bb.0: 84; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 85; GFX1010-SDAG-NEXT: s_clause 0x1 86; GFX1010-SDAG-NEXT: global_load_dwordx2 v[13:14], v[0:1], off offset:16 87; GFX1010-SDAG-NEXT: global_load_dwordx4 v[9:12], v[0:1], off 88; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v8 89; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v5 90; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s8, v4 91; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s9, v3 92; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s10, v2 93; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v7 94; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v6 95; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(1) 96; GFX1010-SDAG-NEXT: v_writelane_b32 v14, s4, s5 97; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 98; GFX1010-SDAG-NEXT: v_writelane_b32 v12, s7, s5 99; GFX1010-SDAG-NEXT: v_writelane_b32 v11, s8, s5 100; GFX1010-SDAG-NEXT: v_writelane_b32 v10, s9, s5 101; GFX1010-SDAG-NEXT: v_writelane_b32 v9, s10, s5 102; GFX1010-SDAG-NEXT: v_writelane_b32 v13, s6, s5 103; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[9:12], off 104; GFX1010-SDAG-NEXT: global_store_dwordx2 v[0:1], v[13:14], off offset:16 105; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 106; 107; GFX1100-SDAG-LABEL: test_writelane_v3p0: 108; GFX1100-SDAG: ; %bb.0: 109; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 110; GFX1100-SDAG-NEXT: s_clause 0x1 111; GFX1100-SDAG-NEXT: global_load_b64 v[13:14], v[0:1], off offset:16 112; GFX1100-SDAG-NEXT: global_load_b128 v[9:12], v[0:1], off 113; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v8 114; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v5 115; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s4, v4 116; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s5, v3 117; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s6, v2 118; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v7 119; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v6 120; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(1) 121; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 122; GFX1100-SDAG-NEXT: v_writelane_b32 v14, s0, s1 123; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 124; GFX1100-SDAG-NEXT: v_writelane_b32 v12, s3, s1 125; GFX1100-SDAG-NEXT: v_writelane_b32 v11, s4, s1 126; GFX1100-SDAG-NEXT: v_writelane_b32 v10, s5, s1 127; GFX1100-SDAG-NEXT: v_writelane_b32 v9, s6, s1 128; GFX1100-SDAG-NEXT: v_writelane_b32 v13, s2, s1 129; GFX1100-SDAG-NEXT: s_clause 0x1 130; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[9:12], off 131; GFX1100-SDAG-NEXT: global_store_b64 v[0:1], v[13:14], off offset:16 132; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 133 %oldval = load <3 x ptr>, ptr addrspace(1) %out 134 %writelane = call <3 x ptr> @llvm.amdgcn.writelane.v3p0(<3 x ptr> %src, i32 %src1, <3 x ptr> %oldval) 135 store <3 x ptr> %writelane, ptr addrspace(1) %out, align 4 136 ret void 137} 138 139define void @test_writelane_p3(ptr addrspace(1) %out, ptr addrspace(3) %src, i32 %src1) { 140; GFX802-SDAG-LABEL: test_writelane_p3: 141; GFX802-SDAG: ; %bb.0: 142; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 143; GFX802-SDAG-NEXT: flat_load_dword v4, v[0:1] 144; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3 145; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2 146; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 147; GFX802-SDAG-NEXT: s_nop 1 148; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0 149; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v4 150; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 151; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 152; 153; GFX1010-SDAG-LABEL: test_writelane_p3: 154; GFX1010-SDAG: ; %bb.0: 155; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 156; GFX1010-SDAG-NEXT: global_load_dword v4, v[0:1], off 157; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2 158; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3 159; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 160; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5 161; GFX1010-SDAG-NEXT: global_store_dword v[0:1], v4, off 162; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 163; 164; GFX1100-SDAG-LABEL: test_writelane_p3: 165; GFX1100-SDAG: ; %bb.0: 166; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 167; GFX1100-SDAG-NEXT: global_load_b32 v4, v[0:1], off 168; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2 169; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3 170; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 171; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 172; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1 173; GFX1100-SDAG-NEXT: global_store_b32 v[0:1], v4, off 174; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 175 %oldval = load ptr addrspace(3), ptr addrspace(1) %out 176 %writelane = call ptr addrspace(3) @llvm.amdgcn.writelane.p3(ptr addrspace(3) %src, i32 %src1, ptr addrspace(3) %oldval) 177 store ptr addrspace(3) %writelane, ptr addrspace(1) %out, align 4 178 ret void 179} 180 181define void @test_writelane_v3p3(ptr addrspace(1) %out, <3 x ptr addrspace(3)> %src, i32 %src1) { 182; GFX802-SDAG-LABEL: test_writelane_v3p3: 183; GFX802-SDAG: ; %bb.0: 184; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 185; GFX802-SDAG-NEXT: flat_load_dwordx3 v[6:8], v[0:1] 186; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v5 187; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v4 188; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v3 189; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v2 190; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 191; GFX802-SDAG-NEXT: v_writelane_b32 v8, s4, m0 192; GFX802-SDAG-NEXT: v_writelane_b32 v7, s5, m0 193; GFX802-SDAG-NEXT: v_writelane_b32 v6, s6, m0 194; GFX802-SDAG-NEXT: flat_store_dwordx3 v[0:1], v[6:8] 195; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 196; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 197; 198; GFX1010-SDAG-LABEL: test_writelane_v3p3: 199; GFX1010-SDAG: ; %bb.0: 200; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201; GFX1010-SDAG-NEXT: global_load_dwordx3 v[6:8], v[0:1], off 202; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v4 203; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v5 204; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v3 205; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v2 206; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 207; GFX1010-SDAG-NEXT: v_writelane_b32 v8, s4, s5 208; GFX1010-SDAG-NEXT: v_writelane_b32 v7, s6, s5 209; GFX1010-SDAG-NEXT: v_writelane_b32 v6, s7, s5 210; GFX1010-SDAG-NEXT: global_store_dwordx3 v[0:1], v[6:8], off 211; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 212; 213; GFX1100-SDAG-LABEL: test_writelane_v3p3: 214; GFX1100-SDAG: ; %bb.0: 215; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 216; GFX1100-SDAG-NEXT: global_load_b96 v[6:8], v[0:1], off 217; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v4 218; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v5 219; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v3 220; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v2 221; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 222; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 223; GFX1100-SDAG-NEXT: v_writelane_b32 v8, s0, s1 224; GFX1100-SDAG-NEXT: v_writelane_b32 v7, s2, s1 225; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) 226; GFX1100-SDAG-NEXT: v_writelane_b32 v6, s3, s1 227; GFX1100-SDAG-NEXT: global_store_b96 v[0:1], v[6:8], off 228; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 229 %oldval = load <3 x ptr addrspace(3)>, ptr addrspace(1) %out 230 %writelane = call <3 x ptr addrspace(3)> @llvm.amdgcn.writelane.v3p3(<3 x ptr addrspace(3)> %src, i32 %src1, <3 x ptr addrspace(3)> %oldval) 231 store <3 x ptr addrspace(3)> %writelane, ptr addrspace(1) %out, align 4 232 ret void 233} 234 235define void @test_writelane_p5(ptr addrspace(1) %out, ptr addrspace(5) %src, i32 %src1) { 236; GFX802-SDAG-LABEL: test_writelane_p5: 237; GFX802-SDAG: ; %bb.0: 238; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 239; GFX802-SDAG-NEXT: flat_load_dword v4, v[0:1] 240; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3 241; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2 242; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 243; GFX802-SDAG-NEXT: s_nop 1 244; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0 245; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v4 246; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 247; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 248; 249; GFX1010-SDAG-LABEL: test_writelane_p5: 250; GFX1010-SDAG: ; %bb.0: 251; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 252; GFX1010-SDAG-NEXT: global_load_dword v4, v[0:1], off 253; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2 254; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3 255; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 256; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5 257; GFX1010-SDAG-NEXT: global_store_dword v[0:1], v4, off 258; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 259; 260; GFX1100-SDAG-LABEL: test_writelane_p5: 261; GFX1100-SDAG: ; %bb.0: 262; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 263; GFX1100-SDAG-NEXT: global_load_b32 v4, v[0:1], off 264; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2 265; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3 266; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 267; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 268; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1 269; GFX1100-SDAG-NEXT: global_store_b32 v[0:1], v4, off 270; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 271 %oldval = load ptr addrspace(5), ptr addrspace(1) %out 272 %writelane = call ptr addrspace(5) @llvm.amdgcn.writelane.p5(ptr addrspace(5) %src, i32 %src1, ptr addrspace(5) %oldval) 273 store ptr addrspace(5) %writelane, ptr addrspace(1) %out, align 4 274 ret void 275} 276 277define void @test_writelane_v3p5(ptr addrspace(1) %out, <3 x ptr addrspace(5)> %src, i32 %src1) { 278; GFX802-SDAG-LABEL: test_writelane_v3p5: 279; GFX802-SDAG: ; %bb.0: 280; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 281; GFX802-SDAG-NEXT: flat_load_dwordx3 v[6:8], v[0:1] 282; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v5 283; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v4 284; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v3 285; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v2 286; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 287; GFX802-SDAG-NEXT: v_writelane_b32 v8, s4, m0 288; GFX802-SDAG-NEXT: v_writelane_b32 v7, s5, m0 289; GFX802-SDAG-NEXT: v_writelane_b32 v6, s6, m0 290; GFX802-SDAG-NEXT: flat_store_dwordx3 v[0:1], v[6:8] 291; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 292; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 293; 294; GFX1010-SDAG-LABEL: test_writelane_v3p5: 295; GFX1010-SDAG: ; %bb.0: 296; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 297; GFX1010-SDAG-NEXT: global_load_dwordx3 v[6:8], v[0:1], off 298; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v4 299; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v5 300; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v3 301; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v2 302; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 303; GFX1010-SDAG-NEXT: v_writelane_b32 v8, s4, s5 304; GFX1010-SDAG-NEXT: v_writelane_b32 v7, s6, s5 305; GFX1010-SDAG-NEXT: v_writelane_b32 v6, s7, s5 306; GFX1010-SDAG-NEXT: global_store_dwordx3 v[0:1], v[6:8], off 307; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 308; 309; GFX1100-SDAG-LABEL: test_writelane_v3p5: 310; GFX1100-SDAG: ; %bb.0: 311; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 312; GFX1100-SDAG-NEXT: global_load_b96 v[6:8], v[0:1], off 313; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v4 314; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v5 315; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v3 316; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v2 317; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 318; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 319; GFX1100-SDAG-NEXT: v_writelane_b32 v8, s0, s1 320; GFX1100-SDAG-NEXT: v_writelane_b32 v7, s2, s1 321; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) 322; GFX1100-SDAG-NEXT: v_writelane_b32 v6, s3, s1 323; GFX1100-SDAG-NEXT: global_store_b96 v[0:1], v[6:8], off 324; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 325 %oldval = load <3 x ptr addrspace(5)>, ptr addrspace(1) %out 326 %writelane = call <3 x ptr addrspace(5)> @llvm.amdgcn.writelane.v3p5(<3 x ptr addrspace(5)> %src, i32 %src1, <3 x ptr addrspace(5)> %oldval) 327 store <3 x ptr addrspace(5)> %writelane, ptr addrspace(1) %out, align 4 328 ret void 329} 330 331define void @test_writelane_p6(ptr addrspace(1) %out, ptr addrspace(6) %src, i32 %src1) { 332; GFX802-SDAG-LABEL: test_writelane_p6: 333; GFX802-SDAG: ; %bb.0: 334; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 335; GFX802-SDAG-NEXT: flat_load_dword v4, v[0:1] 336; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3 337; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2 338; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 339; GFX802-SDAG-NEXT: s_nop 1 340; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0 341; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v4 342; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 343; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 344; 345; GFX1010-SDAG-LABEL: test_writelane_p6: 346; GFX1010-SDAG: ; %bb.0: 347; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 348; GFX1010-SDAG-NEXT: global_load_dword v4, v[0:1], off 349; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2 350; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3 351; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 352; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5 353; GFX1010-SDAG-NEXT: global_store_dword v[0:1], v4, off 354; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 355; 356; GFX1100-SDAG-LABEL: test_writelane_p6: 357; GFX1100-SDAG: ; %bb.0: 358; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 359; GFX1100-SDAG-NEXT: global_load_b32 v4, v[0:1], off 360; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2 361; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3 362; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 363; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 364; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1 365; GFX1100-SDAG-NEXT: global_store_b32 v[0:1], v4, off 366; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 367 %oldval = load ptr addrspace(6), ptr addrspace(1) %out 368 %writelane = call ptr addrspace(6) @llvm.amdgcn.writelane.p6(ptr addrspace(6) %src, i32 %src1, ptr addrspace(6) %oldval) 369 store ptr addrspace(6) %writelane, ptr addrspace(1) %out, align 4 370 ret void 371} 372 373define void @test_writelane_v3p6(ptr addrspace(1) %out, <3 x ptr addrspace(6)> %src, i32 %src1) { 374; GFX802-SDAG-LABEL: test_writelane_v3p6: 375; GFX802-SDAG: ; %bb.0: 376; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 377; GFX802-SDAG-NEXT: flat_load_dwordx3 v[6:8], v[0:1] 378; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v5 379; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v4 380; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v3 381; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v2 382; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 383; GFX802-SDAG-NEXT: v_writelane_b32 v8, s4, m0 384; GFX802-SDAG-NEXT: v_writelane_b32 v7, s5, m0 385; GFX802-SDAG-NEXT: v_writelane_b32 v6, s6, m0 386; GFX802-SDAG-NEXT: flat_store_dwordx3 v[0:1], v[6:8] 387; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 388; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 389; 390; GFX1010-SDAG-LABEL: test_writelane_v3p6: 391; GFX1010-SDAG: ; %bb.0: 392; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 393; GFX1010-SDAG-NEXT: global_load_dwordx3 v[6:8], v[0:1], off 394; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v4 395; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v5 396; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v3 397; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v2 398; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 399; GFX1010-SDAG-NEXT: v_writelane_b32 v8, s4, s5 400; GFX1010-SDAG-NEXT: v_writelane_b32 v7, s6, s5 401; GFX1010-SDAG-NEXT: v_writelane_b32 v6, s7, s5 402; GFX1010-SDAG-NEXT: global_store_dwordx3 v[0:1], v[6:8], off 403; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 404; 405; GFX1100-SDAG-LABEL: test_writelane_v3p6: 406; GFX1100-SDAG: ; %bb.0: 407; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 408; GFX1100-SDAG-NEXT: global_load_b96 v[6:8], v[0:1], off 409; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v4 410; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v5 411; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v3 412; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v2 413; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 414; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 415; GFX1100-SDAG-NEXT: v_writelane_b32 v8, s0, s1 416; GFX1100-SDAG-NEXT: v_writelane_b32 v7, s2, s1 417; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) 418; GFX1100-SDAG-NEXT: v_writelane_b32 v6, s3, s1 419; GFX1100-SDAG-NEXT: global_store_b96 v[0:1], v[6:8], off 420; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 421 %oldval = load <3 x ptr addrspace(6)>, ptr addrspace(1) %out 422 %writelane = call <3 x ptr addrspace(6)> @llvm.amdgcn.writelane.v3p6(<3 x ptr addrspace(6)> %src, i32 %src1, <3 x ptr addrspace(6)> %oldval) 423 store <3 x ptr addrspace(6)> %writelane, ptr addrspace(1) %out, align 4 424 ret void 425} 426