1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX802-SDAG %s 3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1010-SDAG %s 4; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=GFX1100-SDAG %s 5 6; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs -global-isel -global-isel-abort=2 < %s | FileCheck -check-prefixes=GFX802-GISEL %s 7; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs -global-isel -global-isel-abort=2 < %s | FileCheck -check-prefixes=GFX1010-GISEL %s 8; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 -global-isel -global-isel-abort=2 < %s | FileCheck -check-prefixes=GFX1100-GISEL %s 9 10declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0 11declare i64 @llvm.amdgcn.writelane.i64(i64, i32, i64) #0 12declare double @llvm.amdgcn.writelane.f64(double, i32, double) #0 13 14define amdgpu_kernel void @test_writelane_sreg_i32(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 { 15; GFX802-SDAG-LABEL: test_writelane_sreg_i32: 16; GFX802-SDAG: ; %bb.0: 17; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 18; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 19; GFX802-SDAG-NEXT: s_mov_b32 m0, s3 20; GFX802-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0 21; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0 22; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1 23; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 24; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s3 25; GFX802-SDAG-NEXT: v_writelane_b32 v2, s2, m0 26; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2 27; GFX802-SDAG-NEXT: s_endpgm 28; 29; GFX1010-SDAG-LABEL: test_writelane_sreg_i32: 30; GFX1010-SDAG: ; %bb.0: 31; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 32; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0 33; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 34; GFX1010-SDAG-NEXT: s_load_dword s4, s[0:1], 0x0 35; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 36; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4 37; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s3 38; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1] 39; GFX1010-SDAG-NEXT: s_endpgm 40; 41; GFX1100-SDAG-LABEL: test_writelane_sreg_i32: 42; GFX1100-SDAG: ; %bb.0: 43; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 44; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0 45; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 46; GFX1100-SDAG-NEXT: s_load_b32 s4, s[0:1], 0x0 47; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 48; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s4 49; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s3 50; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] 51; GFX1100-SDAG-NEXT: s_endpgm 52; 53; GFX802-GISEL-LABEL: test_writelane_sreg_i32: 54; GFX802-GISEL: ; %bb.0: 55; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 56; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 57; GFX802-GISEL-NEXT: s_mov_b32 m0, s3 58; GFX802-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0 59; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0 60; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1 61; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 62; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3 63; GFX802-GISEL-NEXT: v_writelane_b32 v2, s2, m0 64; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2 65; GFX802-GISEL-NEXT: s_endpgm 66; 67; GFX1010-GISEL-LABEL: test_writelane_sreg_i32: 68; GFX1010-GISEL: ; %bb.0: 69; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 70; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0 71; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 72; GFX1010-GISEL-NEXT: s_load_dword s4, s[0:1], 0x0 73; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 74; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4 75; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s3 76; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1] 77; GFX1010-GISEL-NEXT: s_endpgm 78; 79; GFX1100-GISEL-LABEL: test_writelane_sreg_i32: 80; GFX1100-GISEL: ; %bb.0: 81; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 82; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0 83; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 84; GFX1100-GISEL-NEXT: s_load_b32 s4, s[0:1], 0x0 85; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 86; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s4 87; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s3 88; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 89; GFX1100-GISEL-NEXT: s_endpgm 90 %oldval = load i32, ptr addrspace(1) %out 91 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 %src1, i32 %oldval) 92 store i32 %writelane, ptr addrspace(1) %out, align 4 93 ret void 94} 95 96define amdgpu_kernel void @test_writelane_sreg_i64(ptr addrspace(1) %out, i64 %src0, i32 %src1) #1 { 97; GFX802-SDAG-LABEL: test_writelane_sreg_i64: 98; GFX802-SDAG: ; %bb.0: 99; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 100; GFX802-SDAG-NEXT: s_load_dword s6, s[8:9], 0x10 101; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 102; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 103; GFX802-SDAG-NEXT: s_mov_b32 m0, s6 104; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1 105; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0 106; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 107; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s5 108; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s4 109; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, m0 110; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, m0 111; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 112; GFX802-SDAG-NEXT: s_endpgm 113; 114; GFX1010-SDAG-LABEL: test_writelane_sreg_i64: 115; GFX1010-SDAG: ; %bb.0: 116; GFX1010-SDAG-NEXT: s_clause 0x1 117; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 118; GFX1010-SDAG-NEXT: s_load_dword s6, s[8:9], 0x10 119; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0 120; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 121; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 122; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 123; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s5 124; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4 125; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, s6 126; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s6 127; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 128; GFX1010-SDAG-NEXT: s_endpgm 129; 130; GFX1100-SDAG-LABEL: test_writelane_sreg_i64: 131; GFX1100-SDAG: ; %bb.0: 132; GFX1100-SDAG-NEXT: s_clause 0x1 133; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 134; GFX1100-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x10 135; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 136; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 137; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 138; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 139; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s5 140; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s4 141; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, s6 142; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s6 143; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] 144; GFX1100-SDAG-NEXT: s_endpgm 145; 146; GFX802-GISEL-LABEL: test_writelane_sreg_i64: 147; GFX802-GISEL: ; %bb.0: 148; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 149; GFX802-GISEL-NEXT: s_load_dword s6, s[8:9], 0x10 150; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 151; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 152; GFX802-GISEL-NEXT: s_mov_b32 m0, s6 153; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1 154; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0 155; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 156; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s4 157; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s5 158; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, m0 159; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, m0 160; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 161; GFX802-GISEL-NEXT: s_endpgm 162; 163; GFX1010-GISEL-LABEL: test_writelane_sreg_i64: 164; GFX1010-GISEL: ; %bb.0: 165; GFX1010-GISEL-NEXT: s_clause 0x1 166; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 167; GFX1010-GISEL-NEXT: s_load_dword s6, s[8:9], 0x10 168; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0 169; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 170; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 171; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 172; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4 173; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s5 174; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s6 175; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, s6 176; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 177; GFX1010-GISEL-NEXT: s_endpgm 178; 179; GFX1100-GISEL-LABEL: test_writelane_sreg_i64: 180; GFX1100-GISEL: ; %bb.0: 181; GFX1100-GISEL-NEXT: s_clause 0x1 182; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 183; GFX1100-GISEL-NEXT: s_load_b32 s6, s[4:5], 0x10 184; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 185; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 186; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 187; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 188; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s4 189; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s5 190; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s6 191; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, s6 192; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] 193; GFX1100-GISEL-NEXT: s_endpgm 194 %oldval = load i64, ptr addrspace(1) %out 195 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 %src1, i64 %oldval) 196 store i64 %writelane, ptr addrspace(1) %out, align 4 197 ret void 198} 199 200define amdgpu_kernel void @test_writelane_sreg_f64(ptr addrspace(1) %out, double %src0, i32 %src1) #1 { 201; GFX802-SDAG-LABEL: test_writelane_sreg_f64: 202; GFX802-SDAG: ; %bb.0: 203; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 204; GFX802-SDAG-NEXT: s_load_dword s6, s[8:9], 0x10 205; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 206; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 207; GFX802-SDAG-NEXT: s_mov_b32 m0, s6 208; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1 209; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0 210; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 211; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s5 212; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s4 213; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, m0 214; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, m0 215; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 216; GFX802-SDAG-NEXT: s_endpgm 217; 218; GFX1010-SDAG-LABEL: test_writelane_sreg_f64: 219; GFX1010-SDAG: ; %bb.0: 220; GFX1010-SDAG-NEXT: s_clause 0x1 221; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 222; GFX1010-SDAG-NEXT: s_load_dword s6, s[8:9], 0x10 223; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0 224; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 225; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 226; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 227; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s5 228; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4 229; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, s6 230; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s6 231; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 232; GFX1010-SDAG-NEXT: s_endpgm 233; 234; GFX1100-SDAG-LABEL: test_writelane_sreg_f64: 235; GFX1100-SDAG: ; %bb.0: 236; GFX1100-SDAG-NEXT: s_clause 0x1 237; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 238; GFX1100-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x10 239; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 240; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 241; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 242; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 243; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s5 244; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s4 245; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, s6 246; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s6 247; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] 248; GFX1100-SDAG-NEXT: s_endpgm 249; 250; GFX802-GISEL-LABEL: test_writelane_sreg_f64: 251; GFX802-GISEL: ; %bb.0: 252; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 253; GFX802-GISEL-NEXT: s_load_dword s6, s[8:9], 0x10 254; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 255; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 256; GFX802-GISEL-NEXT: s_mov_b32 m0, s6 257; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1 258; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0 259; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 260; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s4 261; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s5 262; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, m0 263; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, m0 264; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 265; GFX802-GISEL-NEXT: s_endpgm 266; 267; GFX1010-GISEL-LABEL: test_writelane_sreg_f64: 268; GFX1010-GISEL: ; %bb.0: 269; GFX1010-GISEL-NEXT: s_clause 0x1 270; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 271; GFX1010-GISEL-NEXT: s_load_dword s6, s[8:9], 0x10 272; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0 273; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 274; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 275; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 276; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4 277; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s5 278; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s6 279; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, s6 280; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 281; GFX1010-GISEL-NEXT: s_endpgm 282; 283; GFX1100-GISEL-LABEL: test_writelane_sreg_f64: 284; GFX1100-GISEL: ; %bb.0: 285; GFX1100-GISEL-NEXT: s_clause 0x1 286; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 287; GFX1100-GISEL-NEXT: s_load_b32 s6, s[4:5], 0x10 288; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 289; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 290; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 291; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 292; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s4 293; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s5 294; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s6 295; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, s6 296; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] 297; GFX1100-GISEL-NEXT: s_endpgm 298 %oldval = load double, ptr addrspace(1) %out 299 %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 %src1, double %oldval) 300 store double %writelane, ptr addrspace(1) %out, align 4 301 ret void 302} 303 304define amdgpu_kernel void @test_writelane_imm_sreg_i32(ptr addrspace(1) %out, i32 %src1) #1 { 305; GFX802-SDAG-LABEL: test_writelane_imm_sreg_i32: 306; GFX802-SDAG: ; %bb.0: 307; GFX802-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 308; GFX802-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8 309; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 310; GFX802-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0 311; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0 312; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1 313; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 314; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s3 315; GFX802-SDAG-NEXT: v_writelane_b32 v2, 32, s2 316; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2 317; GFX802-SDAG-NEXT: s_endpgm 318; 319; GFX1010-SDAG-LABEL: test_writelane_imm_sreg_i32: 320; GFX1010-SDAG: ; %bb.0: 321; GFX1010-SDAG-NEXT: s_clause 0x1 322; GFX1010-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 323; GFX1010-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8 324; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0 325; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 326; GFX1010-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0 327; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 328; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s3 329; GFX1010-SDAG-NEXT: v_writelane_b32 v0, 32, s2 330; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1] 331; GFX1010-SDAG-NEXT: s_endpgm 332; 333; GFX1100-SDAG-LABEL: test_writelane_imm_sreg_i32: 334; GFX1100-SDAG: ; %bb.0: 335; GFX1100-SDAG-NEXT: s_clause 0x1 336; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 337; GFX1100-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x8 338; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0 339; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 340; GFX1100-SDAG-NEXT: s_load_b32 s3, s[0:1], 0x0 341; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 342; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s3 343; GFX1100-SDAG-NEXT: v_writelane_b32 v0, 32, s2 344; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] 345; GFX1100-SDAG-NEXT: s_endpgm 346; 347; GFX802-GISEL-LABEL: test_writelane_imm_sreg_i32: 348; GFX802-GISEL: ; %bb.0: 349; GFX802-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 350; GFX802-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8 351; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 352; GFX802-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0 353; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0 354; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1 355; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 356; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3 357; GFX802-GISEL-NEXT: v_writelane_b32 v2, 32, s2 358; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2 359; GFX802-GISEL-NEXT: s_endpgm 360; 361; GFX1010-GISEL-LABEL: test_writelane_imm_sreg_i32: 362; GFX1010-GISEL: ; %bb.0: 363; GFX1010-GISEL-NEXT: s_clause 0x1 364; GFX1010-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 365; GFX1010-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8 366; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0 367; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 368; GFX1010-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0 369; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 370; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s3 371; GFX1010-GISEL-NEXT: v_writelane_b32 v0, 32, s2 372; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1] 373; GFX1010-GISEL-NEXT: s_endpgm 374; 375; GFX1100-GISEL-LABEL: test_writelane_imm_sreg_i32: 376; GFX1100-GISEL: ; %bb.0: 377; GFX1100-GISEL-NEXT: s_clause 0x1 378; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 379; GFX1100-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x8 380; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0 381; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 382; GFX1100-GISEL-NEXT: s_load_b32 s3, s[0:1], 0x0 383; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 384; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s3 385; GFX1100-GISEL-NEXT: v_writelane_b32 v0, 32, s2 386; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 387; GFX1100-GISEL-NEXT: s_endpgm 388 %oldval = load i32, ptr addrspace(1) %out 389 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 32, i32 %src1, i32 %oldval) 390 store i32 %writelane, ptr addrspace(1) %out, align 4 391 ret void 392} 393 394define amdgpu_kernel void @test_writelane_imm_sreg_i64(ptr addrspace(1) %out, i32 %src1) #1 { 395; GFX802-SDAG-LABEL: test_writelane_imm_sreg_i64: 396; GFX802-SDAG: ; %bb.0: 397; GFX802-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 398; GFX802-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8 399; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 400; GFX802-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 401; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1 402; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0 403; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 404; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3 405; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2 406; GFX802-SDAG-NEXT: v_writelane_b32 v1, 0, s4 407; GFX802-SDAG-NEXT: v_writelane_b32 v0, 32, s4 408; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 409; GFX802-SDAG-NEXT: s_endpgm 410; 411; GFX1010-SDAG-LABEL: test_writelane_imm_sreg_i64: 412; GFX1010-SDAG: ; %bb.0: 413; GFX1010-SDAG-NEXT: s_clause 0x1 414; GFX1010-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 415; GFX1010-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8 416; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0 417; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 418; GFX1010-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 419; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 420; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s3 421; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s2 422; GFX1010-SDAG-NEXT: v_writelane_b32 v1, 0, s4 423; GFX1010-SDAG-NEXT: v_writelane_b32 v0, 32, s4 424; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 425; GFX1010-SDAG-NEXT: s_endpgm 426; 427; GFX1100-SDAG-LABEL: test_writelane_imm_sreg_i64: 428; GFX1100-SDAG: ; %bb.0: 429; GFX1100-SDAG-NEXT: s_clause 0x1 430; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 431; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x8 432; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 433; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 434; GFX1100-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x0 435; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 436; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s3 437; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2 438; GFX1100-SDAG-NEXT: v_writelane_b32 v1, 0, s4 439; GFX1100-SDAG-NEXT: v_writelane_b32 v0, 32, s4 440; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] 441; GFX1100-SDAG-NEXT: s_endpgm 442; 443; GFX802-GISEL-LABEL: test_writelane_imm_sreg_i64: 444; GFX802-GISEL: ; %bb.0: 445; GFX802-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 446; GFX802-GISEL-NEXT: s_load_dword s4, s[8:9], 0x8 447; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 448; GFX802-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 449; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1 450; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0 451; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 452; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s2 453; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s3 454; GFX802-GISEL-NEXT: v_writelane_b32 v0, 32, s4 455; GFX802-GISEL-NEXT: v_writelane_b32 v1, 0, s4 456; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 457; GFX802-GISEL-NEXT: s_endpgm 458; 459; GFX1010-GISEL-LABEL: test_writelane_imm_sreg_i64: 460; GFX1010-GISEL: ; %bb.0: 461; GFX1010-GISEL-NEXT: s_clause 0x1 462; GFX1010-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 463; GFX1010-GISEL-NEXT: s_load_dword s4, s[8:9], 0x8 464; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0 465; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 466; GFX1010-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 467; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 468; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s2 469; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s3 470; GFX1010-GISEL-NEXT: v_writelane_b32 v0, 32, s4 471; GFX1010-GISEL-NEXT: v_writelane_b32 v1, 0, s4 472; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 473; GFX1010-GISEL-NEXT: s_endpgm 474; 475; GFX1100-GISEL-LABEL: test_writelane_imm_sreg_i64: 476; GFX1100-GISEL: ; %bb.0: 477; GFX1100-GISEL-NEXT: s_clause 0x1 478; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 479; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x8 480; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 481; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 482; GFX1100-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x0 483; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 484; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s2 485; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s3 486; GFX1100-GISEL-NEXT: v_writelane_b32 v0, 32, s4 487; GFX1100-GISEL-NEXT: v_writelane_b32 v1, 0, s4 488; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] 489; GFX1100-GISEL-NEXT: s_endpgm 490 %oldval = load i64, ptr addrspace(1) %out 491 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 32, i32 %src1, i64 %oldval) 492 store i64 %writelane, ptr addrspace(1) %out, align 4 493 ret void 494} 495 496define amdgpu_kernel void @test_writelane_imm_sreg_f64(ptr addrspace(1) %out, i32 %src1) #1 { 497; GFX802-SDAG-LABEL: test_writelane_imm_sreg_f64: 498; GFX802-SDAG: ; %bb.0: 499; GFX802-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 500; GFX802-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8 501; GFX802-SDAG-NEXT: s_mov_b32 s5, 0x40400000 502; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 503; GFX802-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 504; GFX802-SDAG-NEXT: s_mov_b32 m0, s4 505; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1 506; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0 507; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 508; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3 509; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2 510; GFX802-SDAG-NEXT: v_writelane_b32 v1, s5, m0 511; GFX802-SDAG-NEXT: v_writelane_b32 v0, 0, s4 512; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 513; GFX802-SDAG-NEXT: s_endpgm 514; 515; GFX1010-SDAG-LABEL: test_writelane_imm_sreg_f64: 516; GFX1010-SDAG: ; %bb.0: 517; GFX1010-SDAG-NEXT: s_clause 0x1 518; GFX1010-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 519; GFX1010-SDAG-NEXT: s_load_dword s4, s[8:9], 0x8 520; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0 521; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 522; GFX1010-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 523; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 524; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s3 525; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s2 526; GFX1010-SDAG-NEXT: s_mov_b32 s2, 0x40400000 527; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s2, s4 528; GFX1010-SDAG-NEXT: v_writelane_b32 v0, 0, s4 529; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 530; GFX1010-SDAG-NEXT: s_endpgm 531; 532; GFX1100-SDAG-LABEL: test_writelane_imm_sreg_f64: 533; GFX1100-SDAG: ; %bb.0: 534; GFX1100-SDAG-NEXT: s_clause 0x1 535; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 536; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x8 537; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 538; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 539; GFX1100-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x0 540; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 541; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s3 542; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2 543; GFX1100-SDAG-NEXT: s_mov_b32 s2, 0x40400000 544; GFX1100-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 545; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s2, s4 546; GFX1100-SDAG-NEXT: v_writelane_b32 v0, 0, s4 547; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] 548; GFX1100-SDAG-NEXT: s_endpgm 549; 550; GFX802-GISEL-LABEL: test_writelane_imm_sreg_f64: 551; GFX802-GISEL: ; %bb.0: 552; GFX802-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 553; GFX802-GISEL-NEXT: s_load_dword s4, s[8:9], 0x8 554; GFX802-GISEL-NEXT: s_mov_b32 s5, 0x40400000 555; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 556; GFX802-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 557; GFX802-GISEL-NEXT: s_mov_b32 m0, s4 558; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1 559; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0 560; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 561; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s2 562; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s3 563; GFX802-GISEL-NEXT: v_writelane_b32 v0, 0, s4 564; GFX802-GISEL-NEXT: v_writelane_b32 v1, s5, m0 565; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 566; GFX802-GISEL-NEXT: s_endpgm 567; 568; GFX1010-GISEL-LABEL: test_writelane_imm_sreg_f64: 569; GFX1010-GISEL: ; %bb.0: 570; GFX1010-GISEL-NEXT: s_clause 0x1 571; GFX1010-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 572; GFX1010-GISEL-NEXT: s_load_dword s4, s[8:9], 0x8 573; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0 574; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 575; GFX1010-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 576; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 577; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s2 578; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s3 579; GFX1010-GISEL-NEXT: s_mov_b32 s2, 0x40400000 580; GFX1010-GISEL-NEXT: v_writelane_b32 v0, 0, s4 581; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s2, s4 582; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 583; GFX1010-GISEL-NEXT: s_endpgm 584; 585; GFX1100-GISEL-LABEL: test_writelane_imm_sreg_f64: 586; GFX1100-GISEL: ; %bb.0: 587; GFX1100-GISEL-NEXT: s_clause 0x1 588; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 589; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x8 590; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 591; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 592; GFX1100-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x0 593; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 594; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s2 595; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s3 596; GFX1100-GISEL-NEXT: s_mov_b32 s2, 0x40400000 597; GFX1100-GISEL-NEXT: v_writelane_b32 v0, 0, s4 598; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s2, s4 599; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] 600; GFX1100-GISEL-NEXT: s_endpgm 601 %oldval = load double, ptr addrspace(1) %out 602 %writelane = call double @llvm.amdgcn.writelane.f64(double 32.0, i32 %src1, double %oldval) 603 store double %writelane, ptr addrspace(1) %out, align 4 604 ret void 605} 606 607define amdgpu_kernel void @test_writelane_vreg_lane_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { 608; GFX802-SDAG-LABEL: test_writelane_vreg_lane_i32: 609; GFX802-SDAG: ; %bb.0: 610; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 611; GFX802-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0 612; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 613; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3 614; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v0 615; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 616; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, 4, v0 617; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 618; GFX802-SDAG-NEXT: flat_load_dword v0, v[0:1] 619; GFX802-SDAG-NEXT: s_load_dword s2, s[0:1], 0x0 620; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 621; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s2 622; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 623; GFX802-SDAG-NEXT: v_readfirstlane_b32 s2, v0 624; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0 625; GFX802-SDAG-NEXT: s_nop 2 626; GFX802-SDAG-NEXT: v_writelane_b32 v2, 12, s2 627; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1 628; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2 629; GFX802-SDAG-NEXT: s_endpgm 630; 631; GFX1010-SDAG-LABEL: test_writelane_vreg_lane_i32: 632; GFX1010-SDAG: ; %bb.0: 633; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 634; GFX1010-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0 635; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 636; GFX1010-SDAG-NEXT: global_load_dword v0, v0, s[2:3] offset:4 637; GFX1010-SDAG-NEXT: s_waitcnt_depctr 0xffe3 638; GFX1010-SDAG-NEXT: s_load_dword s2, s[0:1], 0x0 639; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 640; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s2 641; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 642; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s2, v0 643; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, 0 644; GFX1010-SDAG-NEXT: v_writelane_b32 v1, 12, s2 645; GFX1010-SDAG-NEXT: global_store_dword v0, v1, s[0:1] 646; GFX1010-SDAG-NEXT: s_endpgm 647; 648; GFX1100-SDAG-LABEL: test_writelane_vreg_lane_i32: 649; GFX1100-SDAG: ; %bb.0: 650; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 651; GFX1100-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 652; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 653; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 3, v0 654; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 655; GFX1100-SDAG-NEXT: global_load_b32 v0, v0, s[2:3] offset:4 656; GFX1100-SDAG-NEXT: s_load_b32 s2, s[0:1], 0x0 657; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 658; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s2 659; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 660; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v0 661; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 662; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 663; GFX1100-SDAG-NEXT: v_writelane_b32 v1, 12, s2 664; GFX1100-SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 665; GFX1100-SDAG-NEXT: s_endpgm 666; 667; GFX802-GISEL-LABEL: test_writelane_vreg_lane_i32: 668; GFX802-GISEL: ; %bb.0: 669; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 670; GFX802-GISEL-NEXT: v_lshlrev_b32_e32 v2, 3, v0 671; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 672; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s2 673; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s3 674; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 675; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 676; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, 4, v0 677; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 678; GFX802-GISEL-NEXT: flat_load_dword v0, v[0:1] 679; GFX802-GISEL-NEXT: s_load_dword s2, s[0:1], 0x0 680; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 681; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s2 682; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 683; GFX802-GISEL-NEXT: v_readfirstlane_b32 s2, v0 684; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0 685; GFX802-GISEL-NEXT: s_nop 2 686; GFX802-GISEL-NEXT: v_writelane_b32 v2, 12, s2 687; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1 688; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2 689; GFX802-GISEL-NEXT: s_endpgm 690; 691; GFX1010-GISEL-LABEL: test_writelane_vreg_lane_i32: 692; GFX1010-GISEL: ; %bb.0: 693; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 694; GFX1010-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0 695; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 696; GFX1010-GISEL-NEXT: global_load_dword v0, v0, s[2:3] offset:4 697; GFX1010-GISEL-NEXT: s_waitcnt_depctr 0xffe3 698; GFX1010-GISEL-NEXT: s_load_dword s2, s[0:1], 0x0 699; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 700; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s2 701; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) 702; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s2, v0 703; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 0 704; GFX1010-GISEL-NEXT: v_writelane_b32 v1, 12, s2 705; GFX1010-GISEL-NEXT: global_store_dword v0, v1, s[0:1] 706; GFX1010-GISEL-NEXT: s_endpgm 707; 708; GFX1100-GISEL-LABEL: test_writelane_vreg_lane_i32: 709; GFX1100-GISEL: ; %bb.0: 710; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 711; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 712; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 713; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 3, v0 714; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 715; GFX1100-GISEL-NEXT: global_load_b32 v0, v0, s[2:3] offset:4 716; GFX1100-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x0 717; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 718; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s2 719; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) 720; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v0 721; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0 722; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 723; GFX1100-GISEL-NEXT: v_writelane_b32 v1, 12, s2 724; GFX1100-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 725; GFX1100-GISEL-NEXT: s_endpgm 726 %tid = call i32 @llvm.amdgcn.workitem.id.x() 727 %gep.in = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 %tid 728 %args = load <2 x i32>, ptr addrspace(1) %gep.in 729 %oldval = load i32, ptr addrspace(1) %out 730 %lane = extractelement <2 x i32> %args, i32 1 731 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 12, i32 %lane, i32 %oldval) 732 store i32 %writelane, ptr addrspace(1) %out, align 4 733 ret void 734} 735 736define amdgpu_kernel void @test_writelane_vreg_lane_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { 737; GFX802-SDAG-LABEL: test_writelane_vreg_lane_i64: 738; GFX802-SDAG: ; %bb.0: 739; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 740; GFX802-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 741; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 742; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3 743; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v0 744; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 745; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, 8, v0 746; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 747; GFX802-SDAG-NEXT: flat_load_dword v2, v[0:1] 748; GFX802-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 749; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 750; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3 751; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2 752; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 753; GFX802-SDAG-NEXT: v_readfirstlane_b32 s2, v2 754; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1 755; GFX802-SDAG-NEXT: s_nop 2 756; GFX802-SDAG-NEXT: v_writelane_b32 v1, 0, s2 757; GFX802-SDAG-NEXT: v_writelane_b32 v0, 12, s2 758; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0 759; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 760; GFX802-SDAG-NEXT: s_endpgm 761; 762; GFX1010-SDAG-LABEL: test_writelane_vreg_lane_i64: 763; GFX1010-SDAG: ; %bb.0: 764; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 765; GFX1010-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 766; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0 767; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 768; GFX1010-SDAG-NEXT: global_load_dword v0, v0, s[2:3] offset:8 769; GFX1010-SDAG-NEXT: s_waitcnt_depctr 0xffe3 770; GFX1010-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 771; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 772; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s3 773; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 774; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s3, v0 775; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s2 776; GFX1010-SDAG-NEXT: v_writelane_b32 v1, 0, s3 777; GFX1010-SDAG-NEXT: v_writelane_b32 v0, 12, s3 778; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 779; GFX1010-SDAG-NEXT: s_endpgm 780; 781; GFX1100-SDAG-LABEL: test_writelane_vreg_lane_i64: 782; GFX1100-SDAG: ; %bb.0: 783; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 784; GFX1100-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 785; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 786; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 787; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 788; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 789; GFX1100-SDAG-NEXT: global_load_b32 v0, v0, s[2:3] offset:8 790; GFX1100-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x0 791; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 792; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s3 793; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 794; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v0 795; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2 796; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 797; GFX1100-SDAG-NEXT: v_writelane_b32 v1, 0, s3 798; GFX1100-SDAG-NEXT: v_writelane_b32 v0, 12, s3 799; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] 800; GFX1100-SDAG-NEXT: s_endpgm 801; 802; GFX802-GISEL-LABEL: test_writelane_vreg_lane_i64: 803; GFX802-GISEL: ; %bb.0: 804; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 805; GFX802-GISEL-NEXT: v_lshlrev_b32_e32 v2, 4, v0 806; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 807; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s2 808; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s3 809; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 810; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 811; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, 8, v0 812; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 813; GFX802-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 814; GFX802-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 815; GFX802-GISEL-NEXT: v_mov_b32_e32 v4, s1 816; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s0 817; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 818; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s2 819; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3 820; GFX802-GISEL-NEXT: v_readfirstlane_b32 s2, v0 821; GFX802-GISEL-NEXT: s_nop 3 822; GFX802-GISEL-NEXT: v_writelane_b32 v1, 12, s2 823; GFX802-GISEL-NEXT: v_writelane_b32 v2, 0, s2 824; GFX802-GISEL-NEXT: flat_store_dwordx2 v[3:4], v[1:2] 825; GFX802-GISEL-NEXT: s_endpgm 826; 827; GFX1010-GISEL-LABEL: test_writelane_vreg_lane_i64: 828; GFX1010-GISEL: ; %bb.0: 829; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 830; GFX1010-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0 831; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 832; GFX1010-GISEL-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] offset:8 833; GFX1010-GISEL-NEXT: s_waitcnt_depctr 0xffe3 834; GFX1010-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 835; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 836; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s2 837; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, s3 838; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s2, v0 839; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 0 840; GFX1010-GISEL-NEXT: v_writelane_b32 v1, 12, s2 841; GFX1010-GISEL-NEXT: v_writelane_b32 v2, 0, s2 842; GFX1010-GISEL-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] 843; GFX1010-GISEL-NEXT: s_endpgm 844; 845; GFX1100-GISEL-LABEL: test_writelane_vreg_lane_i64: 846; GFX1100-GISEL: ; %bb.0: 847; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 848; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 849; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 850; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0 851; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 852; GFX1100-GISEL-NEXT: global_load_b64 v[0:1], v0, s[2:3] offset:8 853; GFX1100-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x0 854; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 855; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s2 856; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, s3 857; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v0 858; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0 859; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 860; GFX1100-GISEL-NEXT: v_writelane_b32 v1, 12, s2 861; GFX1100-GISEL-NEXT: v_writelane_b32 v2, 0, s2 862; GFX1100-GISEL-NEXT: global_store_b64 v0, v[1:2], s[0:1] 863; GFX1100-GISEL-NEXT: s_endpgm 864 %tid = call i32 @llvm.amdgcn.workitem.id.x() 865 %gep.in = getelementptr <2 x i64>, ptr addrspace(1) %in, i32 %tid 866 %args = load <2 x i64>, ptr addrspace(1) %gep.in 867 %oldval = load i64, ptr addrspace(1) %out 868 %lane = extractelement <2 x i64> %args, i32 1 869 %lane32 = trunc i64 %lane to i32 870 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 12, i32 %lane32, i64 %oldval) 871 store i64 %writelane, ptr addrspace(1) %out, align 4 872 ret void 873} 874 875define amdgpu_kernel void @test_writelane_vreg_lane_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 { 876; GFX802-SDAG-LABEL: test_writelane_vreg_lane_f64: 877; GFX802-SDAG: ; %bb.0: 878; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 879; GFX802-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 880; GFX802-SDAG-NEXT: s_mov_b32 s4, 0x40280000 881; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 882; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3 883; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, s2, v0 884; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 885; GFX802-SDAG-NEXT: v_add_u32_e32 v0, vcc, 8, v0 886; GFX802-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 887; GFX802-SDAG-NEXT: flat_load_dword v2, v[0:1] 888; GFX802-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 889; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 890; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3 891; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2 892; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 893; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v2 894; GFX802-SDAG-NEXT: v_readfirstlane_b32 s2, v2 895; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1 896; GFX802-SDAG-NEXT: s_nop 1 897; GFX802-SDAG-NEXT: v_writelane_b32 v1, s4, m0 898; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0 899; GFX802-SDAG-NEXT: v_writelane_b32 v0, 0, s2 900; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 901; GFX802-SDAG-NEXT: s_endpgm 902; 903; GFX1010-SDAG-LABEL: test_writelane_vreg_lane_f64: 904; GFX1010-SDAG: ; %bb.0: 905; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 906; GFX1010-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 907; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0 908; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 909; GFX1010-SDAG-NEXT: global_load_dword v0, v0, s[2:3] offset:8 910; GFX1010-SDAG-NEXT: s_waitcnt_depctr 0xffe3 911; GFX1010-SDAG-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 912; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 913; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s3 914; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 915; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s3, v0 916; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s2 917; GFX1010-SDAG-NEXT: s_mov_b32 s2, 0x40280000 918; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s2, s3 919; GFX1010-SDAG-NEXT: v_writelane_b32 v0, 0, s3 920; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 921; GFX1010-SDAG-NEXT: s_endpgm 922; 923; GFX1100-SDAG-LABEL: test_writelane_vreg_lane_f64: 924; GFX1100-SDAG: ; %bb.0: 925; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 926; GFX1100-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 927; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 928; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) 929; GFX1100-SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0 930; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 931; GFX1100-SDAG-NEXT: global_load_b32 v0, v0, s[2:3] offset:8 932; GFX1100-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x0 933; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 934; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s3 935; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 936; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v0 937; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s2 938; GFX1100-SDAG-NEXT: s_mov_b32 s2, 0x40280000 939; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) 940; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s2, s3 941; GFX1100-SDAG-NEXT: v_writelane_b32 v0, 0, s3 942; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] 943; GFX1100-SDAG-NEXT: s_endpgm 944; 945; GFX802-GISEL-LABEL: test_writelane_vreg_lane_f64: 946; GFX802-GISEL: ; %bb.0: 947; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 948; GFX802-GISEL-NEXT: v_lshlrev_b32_e32 v2, 4, v0 949; GFX802-GISEL-NEXT: s_mov_b32 s4, 0x40280000 950; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 951; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s2 952; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s3 953; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, v0, v2 954; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 955; GFX802-GISEL-NEXT: v_add_u32_e32 v0, vcc, 8, v0 956; GFX802-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 957; GFX802-GISEL-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 958; GFX802-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 959; GFX802-GISEL-NEXT: v_mov_b32_e32 v4, s1 960; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s0 961; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 962; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s2 963; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3 964; GFX802-GISEL-NEXT: v_readfirstlane_b32 s2, v0 965; GFX802-GISEL-NEXT: s_mov_b32 m0, s2 966; GFX802-GISEL-NEXT: s_nop 2 967; GFX802-GISEL-NEXT: v_writelane_b32 v1, 0, s2 968; GFX802-GISEL-NEXT: v_writelane_b32 v2, s4, m0 969; GFX802-GISEL-NEXT: flat_store_dwordx2 v[3:4], v[1:2] 970; GFX802-GISEL-NEXT: s_endpgm 971; 972; GFX1010-GISEL-LABEL: test_writelane_vreg_lane_f64: 973; GFX1010-GISEL: ; %bb.0: 974; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 975; GFX1010-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0 976; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 977; GFX1010-GISEL-NEXT: global_load_dwordx2 v[0:1], v0, s[2:3] offset:8 978; GFX1010-GISEL-NEXT: s_waitcnt_depctr 0xffe3 979; GFX1010-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 980; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 981; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s2 982; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, s3 983; GFX1010-GISEL-NEXT: s_mov_b32 s3, 0x40280000 984; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s2, v0 985; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 0 986; GFX1010-GISEL-NEXT: v_writelane_b32 v1, 0, s2 987; GFX1010-GISEL-NEXT: v_writelane_b32 v2, s3, s2 988; GFX1010-GISEL-NEXT: global_store_dwordx2 v0, v[1:2], s[0:1] 989; GFX1010-GISEL-NEXT: s_endpgm 990; 991; GFX1100-GISEL-LABEL: test_writelane_vreg_lane_f64: 992; GFX1100-GISEL: ; %bb.0: 993; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 994; GFX1100-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 995; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 996; GFX1100-GISEL-NEXT: v_lshlrev_b32_e32 v0, 4, v0 997; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 998; GFX1100-GISEL-NEXT: global_load_b64 v[0:1], v0, s[2:3] offset:8 999; GFX1100-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x0 1000; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 1001; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s2 1002; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, s3 1003; GFX1100-GISEL-NEXT: s_mov_b32 s3, 0x40280000 1004; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v0 1005; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0 1006; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 1007; GFX1100-GISEL-NEXT: v_writelane_b32 v1, 0, s2 1008; GFX1100-GISEL-NEXT: v_writelane_b32 v2, s3, s2 1009; GFX1100-GISEL-NEXT: global_store_b64 v0, v[1:2], s[0:1] 1010; GFX1100-GISEL-NEXT: s_endpgm 1011 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1012 %gep.in = getelementptr <2 x double>, ptr addrspace(1) %in, i32 %tid 1013 %args = load <2 x double>, ptr addrspace(1) %gep.in 1014 %oldval = load double, ptr addrspace(1) %out 1015 %lane = extractelement <2 x double> %args, i32 1 1016 %lane_cast = bitcast double %lane to i64 1017 %lane32 = trunc i64 %lane_cast to i32 1018 %writelane = call double @llvm.amdgcn.writelane.f64(double 12.0, i32 %lane32, double %oldval) 1019 store double %writelane, ptr addrspace(1) %out, align 4 1020 ret void 1021} 1022 1023define amdgpu_kernel void @test_writelane_m0_sreg_i32(ptr addrspace(1) %out, i32 %src1) #1 { 1024; GFX802-SDAG-LABEL: test_writelane_m0_sreg_i32: 1025; GFX802-SDAG: ; %bb.0: 1026; GFX802-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 1027; GFX802-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8 1028; GFX802-SDAG-NEXT: ;;#ASMSTART 1029; GFX802-SDAG-NEXT: s_mov_b32 m0, -1 1030; GFX802-SDAG-NEXT: ;;#ASMEND 1031; GFX802-SDAG-NEXT: s_mov_b32 s4, m0 1032; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1033; GFX802-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0 1034; GFX802-SDAG-NEXT: s_mov_b32 m0, s2 1035; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0 1036; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1 1037; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1038; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s3 1039; GFX802-SDAG-NEXT: v_writelane_b32 v2, s4, m0 1040; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2 1041; GFX802-SDAG-NEXT: s_endpgm 1042; 1043; GFX1010-SDAG-LABEL: test_writelane_m0_sreg_i32: 1044; GFX1010-SDAG: ; %bb.0: 1045; GFX1010-SDAG-NEXT: s_clause 0x1 1046; GFX1010-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 1047; GFX1010-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8 1048; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0 1049; GFX1010-SDAG-NEXT: ;;#ASMSTART 1050; GFX1010-SDAG-NEXT: s_mov_b32 m0, -1 1051; GFX1010-SDAG-NEXT: ;;#ASMEND 1052; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1053; GFX1010-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0 1054; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1055; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s3 1056; GFX1010-SDAG-NEXT: v_writelane_b32 v0, m0, s2 1057; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1] 1058; GFX1010-SDAG-NEXT: s_endpgm 1059; 1060; GFX1100-SDAG-LABEL: test_writelane_m0_sreg_i32: 1061; GFX1100-SDAG: ; %bb.0: 1062; GFX1100-SDAG-NEXT: s_clause 0x1 1063; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1064; GFX1100-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x8 1065; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0 1066; GFX1100-SDAG-NEXT: ;;#ASMSTART 1067; GFX1100-SDAG-NEXT: s_mov_b32 m0, -1 1068; GFX1100-SDAG-NEXT: ;;#ASMEND 1069; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1070; GFX1100-SDAG-NEXT: s_load_b32 s3, s[0:1], 0x0 1071; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1072; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s3 1073; GFX1100-SDAG-NEXT: v_writelane_b32 v0, m0, s2 1074; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] 1075; GFX1100-SDAG-NEXT: s_endpgm 1076; 1077; GFX802-GISEL-LABEL: test_writelane_m0_sreg_i32: 1078; GFX802-GISEL: ; %bb.0: 1079; GFX802-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 1080; GFX802-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8 1081; GFX802-GISEL-NEXT: ;;#ASMSTART 1082; GFX802-GISEL-NEXT: s_mov_b32 m0, -1 1083; GFX802-GISEL-NEXT: ;;#ASMEND 1084; GFX802-GISEL-NEXT: s_mov_b32 s4, m0 1085; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1086; GFX802-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0 1087; GFX802-GISEL-NEXT: s_mov_b32 m0, s2 1088; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0 1089; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1 1090; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1091; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3 1092; GFX802-GISEL-NEXT: v_writelane_b32 v2, s4, m0 1093; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2 1094; GFX802-GISEL-NEXT: s_endpgm 1095; 1096; GFX1010-GISEL-LABEL: test_writelane_m0_sreg_i32: 1097; GFX1010-GISEL: ; %bb.0: 1098; GFX1010-GISEL-NEXT: s_clause 0x1 1099; GFX1010-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 1100; GFX1010-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8 1101; GFX1010-GISEL-NEXT: ;;#ASMSTART 1102; GFX1010-GISEL-NEXT: s_mov_b32 m0, -1 1103; GFX1010-GISEL-NEXT: ;;#ASMEND 1104; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0 1105; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1106; GFX1010-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0 1107; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1108; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s3 1109; GFX1010-GISEL-NEXT: v_writelane_b32 v0, m0, s2 1110; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1] 1111; GFX1010-GISEL-NEXT: s_endpgm 1112; 1113; GFX1100-GISEL-LABEL: test_writelane_m0_sreg_i32: 1114; GFX1100-GISEL: ; %bb.0: 1115; GFX1100-GISEL-NEXT: s_clause 0x1 1116; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1117; GFX1100-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x8 1118; GFX1100-GISEL-NEXT: ;;#ASMSTART 1119; GFX1100-GISEL-NEXT: s_mov_b32 m0, -1 1120; GFX1100-GISEL-NEXT: ;;#ASMEND 1121; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0 1122; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1123; GFX1100-GISEL-NEXT: s_load_b32 s3, s[0:1], 0x0 1124; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1125; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s3 1126; GFX1100-GISEL-NEXT: v_writelane_b32 v0, m0, s2 1127; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 1128; GFX1100-GISEL-NEXT: s_endpgm 1129 %oldval = load i32, ptr addrspace(1) %out 1130 %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"() 1131 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %m0, i32 %src1, i32 %oldval) 1132 store i32 %writelane, ptr addrspace(1) %out, align 4 1133 ret void 1134} 1135 1136define amdgpu_kernel void @test_writelane_imm_i32(ptr addrspace(1) %out, i32 %src0) #1 { 1137; GFX802-SDAG-LABEL: test_writelane_imm_i32: 1138; GFX802-SDAG: ; %bb.0: 1139; GFX802-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 1140; GFX802-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8 1141; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1142; GFX802-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0 1143; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0 1144; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1 1145; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1146; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s3 1147; GFX802-SDAG-NEXT: v_writelane_b32 v2, s2, 32 1148; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2 1149; GFX802-SDAG-NEXT: s_endpgm 1150; 1151; GFX1010-SDAG-LABEL: test_writelane_imm_i32: 1152; GFX1010-SDAG: ; %bb.0: 1153; GFX1010-SDAG-NEXT: s_clause 0x1 1154; GFX1010-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 1155; GFX1010-SDAG-NEXT: s_load_dword s2, s[8:9], 0x8 1156; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0 1157; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1158; GFX1010-SDAG-NEXT: s_load_dword s3, s[0:1], 0x0 1159; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1160; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s3 1161; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, 32 1162; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1] 1163; GFX1010-SDAG-NEXT: s_endpgm 1164; 1165; GFX1100-SDAG-LABEL: test_writelane_imm_i32: 1166; GFX1100-SDAG: ; %bb.0: 1167; GFX1100-SDAG-NEXT: s_clause 0x1 1168; GFX1100-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1169; GFX1100-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x8 1170; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0 1171; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1172; GFX1100-SDAG-NEXT: s_load_b32 s3, s[0:1], 0x0 1173; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1174; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s3 1175; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, 32 1176; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] 1177; GFX1100-SDAG-NEXT: s_endpgm 1178; 1179; GFX802-GISEL-LABEL: test_writelane_imm_i32: 1180; GFX802-GISEL: ; %bb.0: 1181; GFX802-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 1182; GFX802-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8 1183; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1184; GFX802-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0 1185; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0 1186; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1 1187; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1188; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s3 1189; GFX802-GISEL-NEXT: v_writelane_b32 v2, s2, 32 1190; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2 1191; GFX802-GISEL-NEXT: s_endpgm 1192; 1193; GFX1010-GISEL-LABEL: test_writelane_imm_i32: 1194; GFX1010-GISEL: ; %bb.0: 1195; GFX1010-GISEL-NEXT: s_clause 0x1 1196; GFX1010-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 1197; GFX1010-GISEL-NEXT: s_load_dword s2, s[8:9], 0x8 1198; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0 1199; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1200; GFX1010-GISEL-NEXT: s_load_dword s3, s[0:1], 0x0 1201; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1202; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s3 1203; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, 32 1204; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1] 1205; GFX1010-GISEL-NEXT: s_endpgm 1206; 1207; GFX1100-GISEL-LABEL: test_writelane_imm_i32: 1208; GFX1100-GISEL: ; %bb.0: 1209; GFX1100-GISEL-NEXT: s_clause 0x1 1210; GFX1100-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 1211; GFX1100-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x8 1212; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0 1213; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1214; GFX1100-GISEL-NEXT: s_load_b32 s3, s[0:1], 0x0 1215; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1216; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s3 1217; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, 32 1218; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 1219; GFX1100-GISEL-NEXT: s_endpgm 1220 %oldval = load i32, ptr addrspace(1) %out 1221 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 32, i32 %oldval) #0 1222 store i32 %writelane, ptr addrspace(1) %out, align 4 1223 ret void 1224} 1225 1226define amdgpu_kernel void @test_writelane_imm_i64(ptr addrspace(1) %out, i64 %src0) #1 { 1227; GFX802-SDAG-LABEL: test_writelane_imm_i64: 1228; GFX802-SDAG: ; %bb.0: 1229; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1230; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1231; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1232; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1 1233; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0 1234; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1235; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s5 1236; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s4 1237; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, 32 1238; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, 32 1239; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1240; GFX802-SDAG-NEXT: s_endpgm 1241; 1242; GFX1010-SDAG-LABEL: test_writelane_imm_i64: 1243; GFX1010-SDAG: ; %bb.0: 1244; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1245; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0 1246; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1247; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1248; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1249; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s5 1250; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4 1251; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, 32 1252; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, 32 1253; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1254; GFX1010-SDAG-NEXT: s_endpgm 1255; 1256; GFX1100-SDAG-LABEL: test_writelane_imm_i64: 1257; GFX1100-SDAG: ; %bb.0: 1258; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 1259; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 1260; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1261; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 1262; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1263; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s5 1264; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s4 1265; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, 32 1266; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, 32 1267; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1268; GFX1100-SDAG-NEXT: s_endpgm 1269; 1270; GFX802-GISEL-LABEL: test_writelane_imm_i64: 1271; GFX802-GISEL: ; %bb.0: 1272; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1273; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1274; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1275; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1 1276; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0 1277; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1278; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s4 1279; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s5 1280; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, 32 1281; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, 32 1282; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1283; GFX802-GISEL-NEXT: s_endpgm 1284; 1285; GFX1010-GISEL-LABEL: test_writelane_imm_i64: 1286; GFX1010-GISEL: ; %bb.0: 1287; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1288; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0 1289; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1290; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1291; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1292; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4 1293; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s5 1294; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, 32 1295; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, 32 1296; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1297; GFX1010-GISEL-NEXT: s_endpgm 1298; 1299; GFX1100-GISEL-LABEL: test_writelane_imm_i64: 1300; GFX1100-GISEL: ; %bb.0: 1301; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 1302; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 1303; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1304; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 1305; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1306; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s4 1307; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s5 1308; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, 32 1309; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, 32 1310; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1311; GFX1100-GISEL-NEXT: s_endpgm 1312 %oldval = load i64, ptr addrspace(1) %out 1313 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 32, i64 %oldval) #0 1314 store i64 %writelane, ptr addrspace(1) %out, align 4 1315 ret void 1316} 1317 1318define amdgpu_kernel void @test_writelane_imm_f64(ptr addrspace(1) %out, double %src0) #1 { 1319; GFX802-SDAG-LABEL: test_writelane_imm_f64: 1320; GFX802-SDAG: ; %bb.0: 1321; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1322; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1323; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1324; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1 1325; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0 1326; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1327; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s5 1328; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s4 1329; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, 32 1330; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, 32 1331; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1332; GFX802-SDAG-NEXT: s_endpgm 1333; 1334; GFX1010-SDAG-LABEL: test_writelane_imm_f64: 1335; GFX1010-SDAG: ; %bb.0: 1336; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1337; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0 1338; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1339; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1340; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1341; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s5 1342; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4 1343; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, 32 1344; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, 32 1345; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1346; GFX1010-SDAG-NEXT: s_endpgm 1347; 1348; GFX1100-SDAG-LABEL: test_writelane_imm_f64: 1349; GFX1100-SDAG: ; %bb.0: 1350; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 1351; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 1352; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1353; GFX1100-SDAG-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 1354; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1355; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s5 1356; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s4 1357; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, 32 1358; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, 32 1359; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1360; GFX1100-SDAG-NEXT: s_endpgm 1361; 1362; GFX802-GISEL-LABEL: test_writelane_imm_f64: 1363; GFX802-GISEL: ; %bb.0: 1364; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1365; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1366; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1367; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1 1368; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0 1369; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1370; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s4 1371; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s5 1372; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, 32 1373; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, 32 1374; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1375; GFX802-GISEL-NEXT: s_endpgm 1376; 1377; GFX1010-GISEL-LABEL: test_writelane_imm_f64: 1378; GFX1010-GISEL: ; %bb.0: 1379; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1380; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0 1381; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1382; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 1383; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1384; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4 1385; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s5 1386; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, 32 1387; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, 32 1388; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1389; GFX1010-GISEL-NEXT: s_endpgm 1390; 1391; GFX1100-GISEL-LABEL: test_writelane_imm_f64: 1392; GFX1100-GISEL: ; %bb.0: 1393; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 1394; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 1395; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1396; GFX1100-GISEL-NEXT: s_load_b64 s[4:5], s[0:1], 0x0 1397; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1398; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s4 1399; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s5 1400; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, 32 1401; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, 32 1402; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1403; GFX1100-GISEL-NEXT: s_endpgm 1404 %oldval = load double, ptr addrspace(1) %out 1405 %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 32, double %oldval) #0 1406 store double %writelane, ptr addrspace(1) %out, align 4 1407 ret void 1408} 1409 1410define amdgpu_kernel void @test_writelane_sreg_oldval_i32(i32 inreg %oldval, ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 { 1411; GFX802-SDAG-LABEL: test_writelane_sreg_oldval_i32: 1412; GFX802-SDAG: ; %bb.0: 1413; GFX802-SDAG-NEXT: s_load_dword s4, s[8:9], 0x0 1414; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x8 1415; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1416; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s4 1417; GFX802-SDAG-NEXT: s_mov_b32 m0, s3 1418; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0 1419; GFX802-SDAG-NEXT: v_writelane_b32 v2, s2, m0 1420; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1 1421; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2 1422; GFX802-SDAG-NEXT: s_endpgm 1423; 1424; GFX1010-SDAG-LABEL: test_writelane_sreg_oldval_i32: 1425; GFX1010-SDAG: ; %bb.0: 1426; GFX1010-SDAG-NEXT: s_clause 0x1 1427; GFX1010-SDAG-NEXT: s_load_dword s4, s[8:9], 0x0 1428; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x8 1429; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0 1430; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1431; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s4 1432; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s3 1433; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1] 1434; GFX1010-SDAG-NEXT: s_endpgm 1435; 1436; GFX1100-SDAG-LABEL: test_writelane_sreg_oldval_i32: 1437; GFX1100-SDAG: ; %bb.0: 1438; GFX1100-SDAG-NEXT: s_clause 0x1 1439; GFX1100-SDAG-NEXT: s_load_b32 s6, s[4:5], 0x0 1440; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x8 1441; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0 1442; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1443; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s6 1444; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s3 1445; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] 1446; GFX1100-SDAG-NEXT: s_endpgm 1447; 1448; GFX802-GISEL-LABEL: test_writelane_sreg_oldval_i32: 1449; GFX802-GISEL: ; %bb.0: 1450; GFX802-GISEL-NEXT: s_load_dword s4, s[8:9], 0x0 1451; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x8 1452; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1453; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s4 1454; GFX802-GISEL-NEXT: s_mov_b32 m0, s3 1455; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0 1456; GFX802-GISEL-NEXT: v_writelane_b32 v2, s2, m0 1457; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1 1458; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2 1459; GFX802-GISEL-NEXT: s_endpgm 1460; 1461; GFX1010-GISEL-LABEL: test_writelane_sreg_oldval_i32: 1462; GFX1010-GISEL: ; %bb.0: 1463; GFX1010-GISEL-NEXT: s_clause 0x1 1464; GFX1010-GISEL-NEXT: s_load_dword s4, s[8:9], 0x0 1465; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x8 1466; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0 1467; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1468; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s4 1469; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s3 1470; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1] 1471; GFX1010-GISEL-NEXT: s_endpgm 1472; 1473; GFX1100-GISEL-LABEL: test_writelane_sreg_oldval_i32: 1474; GFX1100-GISEL: ; %bb.0: 1475; GFX1100-GISEL-NEXT: s_clause 0x1 1476; GFX1100-GISEL-NEXT: s_load_b32 s6, s[4:5], 0x0 1477; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x8 1478; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0 1479; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1480; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s6 1481; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s3 1482; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 1483; GFX1100-GISEL-NEXT: s_endpgm 1484 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 %src1, i32 %oldval) 1485 store i32 %writelane, ptr addrspace(1) %out, align 4 1486 ret void 1487} 1488 1489define amdgpu_kernel void @test_writelane_sreg_oldval_i64(i64 inreg %oldval, ptr addrspace(1) %out, i64 %src0, i32 %src1) #1 { 1490; GFX802-SDAG-LABEL: test_writelane_sreg_oldval_i64: 1491; GFX802-SDAG: ; %bb.0: 1492; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1493; GFX802-SDAG-NEXT: s_load_dword s6, s[8:9], 0x18 1494; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 1495; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1496; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1 1497; GFX802-SDAG-NEXT: s_mov_b32 m0, s6 1498; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0 1499; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2 1500; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3 1501; GFX802-SDAG-NEXT: v_writelane_b32 v3, s5, m0 1502; GFX802-SDAG-NEXT: v_writelane_b32 v2, s4, m0 1503; GFX802-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1504; GFX802-SDAG-NEXT: s_endpgm 1505; 1506; GFX1010-SDAG-LABEL: test_writelane_sreg_oldval_i64: 1507; GFX1010-SDAG: ; %bb.0: 1508; GFX1010-SDAG-NEXT: s_clause 0x2 1509; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1510; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 1511; GFX1010-SDAG-NEXT: s_load_dword s6, s[8:9], 0x18 1512; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0 1513; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1514; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s1 1515; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s0 1516; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s5, s6 1517; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s4, s6 1518; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] 1519; GFX1010-SDAG-NEXT: s_endpgm 1520; 1521; GFX1100-SDAG-LABEL: test_writelane_sreg_oldval_i64: 1522; GFX1100-SDAG: ; %bb.0: 1523; GFX1100-SDAG-NEXT: s_clause 0x2 1524; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 1525; GFX1100-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x10 1526; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x18 1527; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 1528; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1529; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s1 1530; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s0 1531; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s7, s4 1532; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s6, s4 1533; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[2:3] 1534; GFX1100-SDAG-NEXT: s_endpgm 1535; 1536; GFX802-GISEL-LABEL: test_writelane_sreg_oldval_i64: 1537; GFX802-GISEL: ; %bb.0: 1538; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1539; GFX802-GISEL-NEXT: s_load_dword s6, s[8:9], 0x18 1540; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 1541; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1542; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0 1543; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1 1544; GFX802-GISEL-NEXT: s_mov_b32 m0, s6 1545; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s2 1546; GFX802-GISEL-NEXT: v_writelane_b32 v0, s4, m0 1547; GFX802-GISEL-NEXT: v_writelane_b32 v1, s5, m0 1548; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s3 1549; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1550; GFX802-GISEL-NEXT: s_endpgm 1551; 1552; GFX1010-GISEL-LABEL: test_writelane_sreg_oldval_i64: 1553; GFX1010-GISEL: ; %bb.0: 1554; GFX1010-GISEL-NEXT: s_clause 0x2 1555; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1556; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 1557; GFX1010-GISEL-NEXT: s_load_dword s6, s[8:9], 0x18 1558; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0 1559; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1560; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s0 1561; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s1 1562; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s4, s6 1563; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s5, s6 1564; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] 1565; GFX1010-GISEL-NEXT: s_endpgm 1566; 1567; GFX1100-GISEL-LABEL: test_writelane_sreg_oldval_i64: 1568; GFX1100-GISEL: ; %bb.0: 1569; GFX1100-GISEL-NEXT: s_clause 0x2 1570; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 1571; GFX1100-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x10 1572; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x18 1573; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 1574; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1575; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s0 1576; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s1 1577; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s6, s4 1578; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s7, s4 1579; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[2:3] 1580; GFX1100-GISEL-NEXT: s_endpgm 1581 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 %src1, i64 %oldval) 1582 store i64 %writelane, ptr addrspace(1) %out, align 4 1583 ret void 1584} 1585 1586define amdgpu_kernel void @test_writelane_sreg_oldval_f64(double inreg %oldval, ptr addrspace(1) %out, double %src0, i32 %src1) #1 { 1587; GFX802-SDAG-LABEL: test_writelane_sreg_oldval_f64: 1588; GFX802-SDAG: ; %bb.0: 1589; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1590; GFX802-SDAG-NEXT: s_load_dword s6, s[8:9], 0x18 1591; GFX802-SDAG-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 1592; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1593; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1 1594; GFX802-SDAG-NEXT: s_mov_b32 m0, s6 1595; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0 1596; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s2 1597; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s3 1598; GFX802-SDAG-NEXT: v_writelane_b32 v3, s5, m0 1599; GFX802-SDAG-NEXT: v_writelane_b32 v2, s4, m0 1600; GFX802-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1601; GFX802-SDAG-NEXT: s_endpgm 1602; 1603; GFX1010-SDAG-LABEL: test_writelane_sreg_oldval_f64: 1604; GFX1010-SDAG: ; %bb.0: 1605; GFX1010-SDAG-NEXT: s_clause 0x2 1606; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1607; GFX1010-SDAG-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 1608; GFX1010-SDAG-NEXT: s_load_dword s6, s[8:9], 0x18 1609; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0 1610; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1611; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, s1 1612; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, s0 1613; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s5, s6 1614; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s4, s6 1615; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] 1616; GFX1010-SDAG-NEXT: s_endpgm 1617; 1618; GFX1100-SDAG-LABEL: test_writelane_sreg_oldval_f64: 1619; GFX1100-SDAG: ; %bb.0: 1620; GFX1100-SDAG-NEXT: s_clause 0x2 1621; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 1622; GFX1100-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x10 1623; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x18 1624; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 1625; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1626; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, s1 1627; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, s0 1628; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s7, s4 1629; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s6, s4 1630; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[2:3] 1631; GFX1100-SDAG-NEXT: s_endpgm 1632; 1633; GFX802-GISEL-LABEL: test_writelane_sreg_oldval_f64: 1634; GFX802-GISEL: ; %bb.0: 1635; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1636; GFX802-GISEL-NEXT: s_load_dword s6, s[8:9], 0x18 1637; GFX802-GISEL-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 1638; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1639; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0 1640; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1 1641; GFX802-GISEL-NEXT: s_mov_b32 m0, s6 1642; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s2 1643; GFX802-GISEL-NEXT: v_writelane_b32 v0, s4, m0 1644; GFX802-GISEL-NEXT: v_writelane_b32 v1, s5, m0 1645; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s3 1646; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1647; GFX802-GISEL-NEXT: s_endpgm 1648; 1649; GFX1010-GISEL-LABEL: test_writelane_sreg_oldval_f64: 1650; GFX1010-GISEL: ; %bb.0: 1651; GFX1010-GISEL-NEXT: s_clause 0x2 1652; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1653; GFX1010-GISEL-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10 1654; GFX1010-GISEL-NEXT: s_load_dword s6, s[8:9], 0x18 1655; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0 1656; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1657; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, s0 1658; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, s1 1659; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s4, s6 1660; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s5, s6 1661; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] 1662; GFX1010-GISEL-NEXT: s_endpgm 1663; 1664; GFX1100-GISEL-LABEL: test_writelane_sreg_oldval_f64: 1665; GFX1100-GISEL: ; %bb.0: 1666; GFX1100-GISEL-NEXT: s_clause 0x2 1667; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 1668; GFX1100-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x10 1669; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x18 1670; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 1671; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1672; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, s0 1673; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, s1 1674; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s6, s4 1675; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s7, s4 1676; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[2:3] 1677; GFX1100-GISEL-NEXT: s_endpgm 1678 %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 %src1, double %oldval) 1679 store double %writelane, ptr addrspace(1) %out, align 4 1680 ret void 1681} 1682 1683define amdgpu_kernel void @test_writelane_imm_oldval_i32(ptr addrspace(1) %out, i32 %src0, i32 %src1) #1 { 1684; GFX802-SDAG-LABEL: test_writelane_imm_oldval_i32: 1685; GFX802-SDAG: ; %bb.0: 1686; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1687; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, 42 1688; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1689; GFX802-SDAG-NEXT: s_mov_b32 m0, s3 1690; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, s0 1691; GFX802-SDAG-NEXT: v_writelane_b32 v2, s2, m0 1692; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, s1 1693; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v2 1694; GFX802-SDAG-NEXT: s_endpgm 1695; 1696; GFX1010-SDAG-LABEL: test_writelane_imm_oldval_i32: 1697; GFX1010-SDAG: ; %bb.0: 1698; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1699; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, 42 1700; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0 1701; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1702; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s3 1703; GFX1010-SDAG-NEXT: global_store_dword v1, v0, s[0:1] 1704; GFX1010-SDAG-NEXT: s_endpgm 1705; 1706; GFX1100-SDAG-LABEL: test_writelane_imm_oldval_i32: 1707; GFX1100-SDAG: ; %bb.0: 1708; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 1709; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 42 1710; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0 1711; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1712; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s3 1713; GFX1100-SDAG-NEXT: global_store_b32 v1, v0, s[0:1] 1714; GFX1100-SDAG-NEXT: s_endpgm 1715; 1716; GFX802-GISEL-LABEL: test_writelane_imm_oldval_i32: 1717; GFX802-GISEL: ; %bb.0: 1718; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1719; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, 42 1720; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1721; GFX802-GISEL-NEXT: s_mov_b32 m0, s3 1722; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, s0 1723; GFX802-GISEL-NEXT: v_writelane_b32 v2, s2, m0 1724; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, s1 1725; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v2 1726; GFX802-GISEL-NEXT: s_endpgm 1727; 1728; GFX1010-GISEL-LABEL: test_writelane_imm_oldval_i32: 1729; GFX1010-GISEL: ; %bb.0: 1730; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1731; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 42 1732; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0 1733; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1734; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s3 1735; GFX1010-GISEL-NEXT: global_store_dword v1, v0, s[0:1] 1736; GFX1010-GISEL-NEXT: s_endpgm 1737; 1738; GFX1100-GISEL-LABEL: test_writelane_imm_oldval_i32: 1739; GFX1100-GISEL: ; %bb.0: 1740; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 1741; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 42 1742; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0 1743; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1744; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s3 1745; GFX1100-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 1746; GFX1100-GISEL-NEXT: s_endpgm 1747 %writelane = call i32 @llvm.amdgcn.writelane.i32(i32 %src0, i32 %src1, i32 42) 1748 store i32 %writelane, ptr addrspace(1) %out, align 4 1749 ret void 1750} 1751 1752define amdgpu_kernel void @test_writelane_imm_oldval_i64(ptr addrspace(1) %out, i64 %src0, i32 %src1) #1 { 1753; GFX802-SDAG-LABEL: test_writelane_imm_oldval_i64: 1754; GFX802-SDAG: ; %bb.0: 1755; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1756; GFX802-SDAG-NEXT: s_load_dword s4, s[8:9], 0x10 1757; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, 0 1758; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, 42 1759; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1760; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0 1761; GFX802-SDAG-NEXT: s_mov_b32 m0, s4 1762; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1 1763; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, m0 1764; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, m0 1765; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1766; GFX802-SDAG-NEXT: s_endpgm 1767; 1768; GFX1010-SDAG-LABEL: test_writelane_imm_oldval_i64: 1769; GFX1010-SDAG: ; %bb.0: 1770; GFX1010-SDAG-NEXT: s_clause 0x1 1771; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1772; GFX1010-SDAG-NEXT: s_load_dword s4, s[8:9], 0x10 1773; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0 1774; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, 42 1775; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0 1776; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1777; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, s4 1778; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s4 1779; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1780; GFX1010-SDAG-NEXT: s_endpgm 1781; 1782; GFX1100-SDAG-LABEL: test_writelane_imm_oldval_i64: 1783; GFX1100-SDAG: ; %bb.0: 1784; GFX1100-SDAG-NEXT: s_clause 0x1 1785; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 1786; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x10 1787; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0 1788; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 42 1789; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 1790; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1791; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, s4 1792; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s4 1793; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1794; GFX1100-SDAG-NEXT: s_endpgm 1795; 1796; GFX802-GISEL-LABEL: test_writelane_imm_oldval_i64: 1797; GFX802-GISEL: ; %bb.0: 1798; GFX802-GISEL-NEXT: s_load_dword s4, s[8:9], 0x10 1799; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1800; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, 42 1801; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, 0 1802; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1803; GFX802-GISEL-NEXT: s_mov_b32 m0, s4 1804; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1 1805; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, m0 1806; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, m0 1807; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0 1808; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1809; GFX802-GISEL-NEXT: s_endpgm 1810; 1811; GFX1010-GISEL-LABEL: test_writelane_imm_oldval_i64: 1812; GFX1010-GISEL: ; %bb.0: 1813; GFX1010-GISEL-NEXT: s_clause 0x1 1814; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1815; GFX1010-GISEL-NEXT: s_load_dword s4, s[8:9], 0x10 1816; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 42 1817; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0 1818; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0 1819; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1820; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s4 1821; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, s4 1822; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1823; GFX1010-GISEL-NEXT: s_endpgm 1824; 1825; GFX1100-GISEL-LABEL: test_writelane_imm_oldval_i64: 1826; GFX1100-GISEL: ; %bb.0: 1827; GFX1100-GISEL-NEXT: s_clause 0x1 1828; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 1829; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x10 1830; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 42 1831; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0 1832; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 1833; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1834; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s4 1835; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, s4 1836; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1837; GFX1100-GISEL-NEXT: s_endpgm 1838 %writelane = call i64 @llvm.amdgcn.writelane.i64(i64 %src0, i32 %src1, i64 42) 1839 store i64 %writelane, ptr addrspace(1) %out, align 4 1840 ret void 1841} 1842 1843define amdgpu_kernel void @test_writelane_imm_oldval_f64(ptr addrspace(1) %out, double %src0, i32 %src1) #1 { 1844; GFX802-SDAG-LABEL: test_writelane_imm_oldval_f64: 1845; GFX802-SDAG: ; %bb.0: 1846; GFX802-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1847; GFX802-SDAG-NEXT: s_load_dword s4, s[8:9], 0x10 1848; GFX802-SDAG-NEXT: v_mov_b32_e32 v1, 0x40450000 1849; GFX802-SDAG-NEXT: v_mov_b32_e32 v0, 0 1850; GFX802-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1851; GFX802-SDAG-NEXT: v_mov_b32_e32 v2, s0 1852; GFX802-SDAG-NEXT: s_mov_b32 m0, s4 1853; GFX802-SDAG-NEXT: v_mov_b32_e32 v3, s1 1854; GFX802-SDAG-NEXT: v_writelane_b32 v1, s3, m0 1855; GFX802-SDAG-NEXT: v_writelane_b32 v0, s2, m0 1856; GFX802-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1857; GFX802-SDAG-NEXT: s_endpgm 1858; 1859; GFX1010-SDAG-LABEL: test_writelane_imm_oldval_f64: 1860; GFX1010-SDAG: ; %bb.0: 1861; GFX1010-SDAG-NEXT: s_clause 0x1 1862; GFX1010-SDAG-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1863; GFX1010-SDAG-NEXT: s_load_dword s4, s[8:9], 0x10 1864; GFX1010-SDAG-NEXT: v_mov_b32_e32 v1, 0x40450000 1865; GFX1010-SDAG-NEXT: v_mov_b32_e32 v0, 0 1866; GFX1010-SDAG-NEXT: v_mov_b32_e32 v2, 0 1867; GFX1010-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1868; GFX1010-SDAG-NEXT: v_writelane_b32 v1, s3, s4 1869; GFX1010-SDAG-NEXT: v_writelane_b32 v0, s2, s4 1870; GFX1010-SDAG-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1871; GFX1010-SDAG-NEXT: s_endpgm 1872; 1873; GFX1100-SDAG-LABEL: test_writelane_imm_oldval_f64: 1874; GFX1100-SDAG: ; %bb.0: 1875; GFX1100-SDAG-NEXT: s_clause 0x1 1876; GFX1100-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 1877; GFX1100-SDAG-NEXT: s_load_b32 s4, s[4:5], 0x10 1878; GFX1100-SDAG-NEXT: v_mov_b32_e32 v1, 0x40450000 1879; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, 0 1880; GFX1100-SDAG-NEXT: v_mov_b32_e32 v2, 0 1881; GFX1100-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1882; GFX1100-SDAG-NEXT: v_writelane_b32 v1, s3, s4 1883; GFX1100-SDAG-NEXT: v_writelane_b32 v0, s2, s4 1884; GFX1100-SDAG-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1885; GFX1100-SDAG-NEXT: s_endpgm 1886; 1887; GFX802-GISEL-LABEL: test_writelane_imm_oldval_f64: 1888; GFX802-GISEL: ; %bb.0: 1889; GFX802-GISEL-NEXT: s_load_dword s4, s[8:9], 0x10 1890; GFX802-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1891; GFX802-GISEL-NEXT: v_mov_b32_e32 v0, 0 1892; GFX802-GISEL-NEXT: v_mov_b32_e32 v1, 0x40450000 1893; GFX802-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1894; GFX802-GISEL-NEXT: s_mov_b32 m0, s4 1895; GFX802-GISEL-NEXT: v_mov_b32_e32 v3, s1 1896; GFX802-GISEL-NEXT: v_writelane_b32 v0, s2, m0 1897; GFX802-GISEL-NEXT: v_writelane_b32 v1, s3, m0 1898; GFX802-GISEL-NEXT: v_mov_b32_e32 v2, s0 1899; GFX802-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1900; GFX802-GISEL-NEXT: s_endpgm 1901; 1902; GFX1010-GISEL-LABEL: test_writelane_imm_oldval_f64: 1903; GFX1010-GISEL: ; %bb.0: 1904; GFX1010-GISEL-NEXT: s_clause 0x1 1905; GFX1010-GISEL-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1906; GFX1010-GISEL-NEXT: s_load_dword s4, s[8:9], 0x10 1907; GFX1010-GISEL-NEXT: v_mov_b32_e32 v0, 0 1908; GFX1010-GISEL-NEXT: v_mov_b32_e32 v1, 0x40450000 1909; GFX1010-GISEL-NEXT: v_mov_b32_e32 v2, 0 1910; GFX1010-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1911; GFX1010-GISEL-NEXT: v_writelane_b32 v0, s2, s4 1912; GFX1010-GISEL-NEXT: v_writelane_b32 v1, s3, s4 1913; GFX1010-GISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1914; GFX1010-GISEL-NEXT: s_endpgm 1915; 1916; GFX1100-GISEL-LABEL: test_writelane_imm_oldval_f64: 1917; GFX1100-GISEL: ; %bb.0: 1918; GFX1100-GISEL-NEXT: s_clause 0x1 1919; GFX1100-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 1920; GFX1100-GISEL-NEXT: s_load_b32 s4, s[4:5], 0x10 1921; GFX1100-GISEL-NEXT: v_mov_b32_e32 v0, 0 1922; GFX1100-GISEL-NEXT: v_mov_b32_e32 v1, 0x40450000 1923; GFX1100-GISEL-NEXT: v_mov_b32_e32 v2, 0 1924; GFX1100-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1925; GFX1100-GISEL-NEXT: v_writelane_b32 v0, s2, s4 1926; GFX1100-GISEL-NEXT: v_writelane_b32 v1, s3, s4 1927; GFX1100-GISEL-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1928; GFX1100-GISEL-NEXT: s_endpgm 1929 %writelane = call double @llvm.amdgcn.writelane.f64(double %src0, i32 %src1, double 42.0) 1930 store double %writelane, ptr addrspace(1) %out, align 4 1931 ret void 1932} 1933 1934define void @test_writelane_half(ptr addrspace(1) %out, half %src, i32 %src1) { 1935; GFX802-SDAG-LABEL: test_writelane_half: 1936; GFX802-SDAG: ; %bb.0: 1937; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1938; GFX802-SDAG-NEXT: flat_load_ushort v4, v[0:1] 1939; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3 1940; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2 1941; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 1942; GFX802-SDAG-NEXT: s_nop 1 1943; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0 1944; GFX802-SDAG-NEXT: flat_store_short v[0:1], v4 1945; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 1946; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 1947; 1948; GFX1010-SDAG-LABEL: test_writelane_half: 1949; GFX1010-SDAG: ; %bb.0: 1950; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1951; GFX1010-SDAG-NEXT: global_load_ushort v4, v[0:1], off 1952; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2 1953; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3 1954; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 1955; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5 1956; GFX1010-SDAG-NEXT: global_store_short v[0:1], v4, off 1957; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 1958; 1959; GFX1100-SDAG-LABEL: test_writelane_half: 1960; GFX1100-SDAG: ; %bb.0: 1961; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1962; GFX1100-SDAG-NEXT: global_load_u16 v4, v[0:1], off 1963; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2 1964; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3 1965; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 1966; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1967; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1 1968; GFX1100-SDAG-NEXT: global_store_b16 v[0:1], v4, off 1969; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 1970; 1971; GFX802-GISEL-LABEL: test_writelane_half: 1972; GFX802-GISEL: ; %bb.0: 1973; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1974; GFX802-GISEL-NEXT: flat_load_ushort v4, v[0:1] 1975; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v3 1976; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2 1977; GFX802-GISEL-NEXT: s_mov_b32 m0, s5 1978; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 1979; GFX802-GISEL-NEXT: v_writelane_b32 v4, s4, m0 1980; GFX802-GISEL-NEXT: flat_store_short v[0:1], v4 1981; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 1982; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31] 1983; 1984; GFX1010-GISEL-LABEL: test_writelane_half: 1985; GFX1010-GISEL: ; %bb.0: 1986; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1987; GFX1010-GISEL-NEXT: global_load_ushort v4, v[0:1], off 1988; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2 1989; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v3 1990; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) 1991; GFX1010-GISEL-NEXT: v_writelane_b32 v4, s4, s5 1992; GFX1010-GISEL-NEXT: global_store_short v[0:1], v4, off 1993; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31] 1994; 1995; GFX1100-GISEL-LABEL: test_writelane_half: 1996; GFX1100-GISEL: ; %bb.0: 1997; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1998; GFX1100-GISEL-NEXT: global_load_u16 v4, v[0:1], off 1999; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2 2000; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3 2001; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) 2002; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2003; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1 2004; GFX1100-GISEL-NEXT: global_store_b16 v[0:1], v4, off 2005; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 2006 %oldval = load half, ptr addrspace(1) %out 2007 %writelane = call half @llvm.amdgcn.writelane.f16(half %src, i32 %src1, half %oldval) 2008 store half %writelane, ptr addrspace(1) %out, align 4 2009 ret void 2010} 2011 2012define void @test_writelane_float(ptr addrspace(1) %out, float %src, i32 %src1) { 2013; GFX802-SDAG-LABEL: test_writelane_float: 2014; GFX802-SDAG: ; %bb.0: 2015; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2016; GFX802-SDAG-NEXT: flat_load_dword v4, v[0:1] 2017; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3 2018; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2 2019; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 2020; GFX802-SDAG-NEXT: s_nop 1 2021; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0 2022; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v4 2023; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 2024; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 2025; 2026; GFX1010-SDAG-LABEL: test_writelane_float: 2027; GFX1010-SDAG: ; %bb.0: 2028; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2029; GFX1010-SDAG-NEXT: global_load_dword v4, v[0:1], off 2030; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2 2031; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3 2032; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 2033; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5 2034; GFX1010-SDAG-NEXT: global_store_dword v[0:1], v4, off 2035; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 2036; 2037; GFX1100-SDAG-LABEL: test_writelane_float: 2038; GFX1100-SDAG: ; %bb.0: 2039; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2040; GFX1100-SDAG-NEXT: global_load_b32 v4, v[0:1], off 2041; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2 2042; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3 2043; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 2044; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2045; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1 2046; GFX1100-SDAG-NEXT: global_store_b32 v[0:1], v4, off 2047; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 2048; 2049; GFX802-GISEL-LABEL: test_writelane_float: 2050; GFX802-GISEL: ; %bb.0: 2051; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2052; GFX802-GISEL-NEXT: flat_load_dword v4, v[0:1] 2053; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v3 2054; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2 2055; GFX802-GISEL-NEXT: s_mov_b32 m0, s5 2056; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 2057; GFX802-GISEL-NEXT: v_writelane_b32 v4, s4, m0 2058; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v4 2059; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 2060; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31] 2061; 2062; GFX1010-GISEL-LABEL: test_writelane_float: 2063; GFX1010-GISEL: ; %bb.0: 2064; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2065; GFX1010-GISEL-NEXT: global_load_dword v4, v[0:1], off 2066; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2 2067; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v3 2068; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) 2069; GFX1010-GISEL-NEXT: v_writelane_b32 v4, s4, s5 2070; GFX1010-GISEL-NEXT: global_store_dword v[0:1], v4, off 2071; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31] 2072; 2073; GFX1100-GISEL-LABEL: test_writelane_float: 2074; GFX1100-GISEL: ; %bb.0: 2075; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2076; GFX1100-GISEL-NEXT: global_load_b32 v4, v[0:1], off 2077; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2 2078; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3 2079; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) 2080; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2081; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1 2082; GFX1100-GISEL-NEXT: global_store_b32 v[0:1], v4, off 2083; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 2084 %oldval = load float, ptr addrspace(1) %out 2085 %writelane = call float @llvm.amdgcn.writelane.f32(float %src, i32 %src1, float %oldval) 2086 store float %writelane, ptr addrspace(1) %out, align 4 2087 ret void 2088} 2089 2090define void @test_writelane_bfloat(ptr addrspace(1) %out, bfloat %src, i32 %src1) { 2091; GFX802-SDAG-LABEL: test_writelane_bfloat: 2092; GFX802-SDAG: ; %bb.0: 2093; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2094; GFX802-SDAG-NEXT: flat_load_ushort v4, v[0:1] 2095; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3 2096; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2 2097; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 2098; GFX802-SDAG-NEXT: s_nop 1 2099; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0 2100; GFX802-SDAG-NEXT: flat_store_short v[0:1], v4 2101; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 2102; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 2103; 2104; GFX1010-SDAG-LABEL: test_writelane_bfloat: 2105; GFX1010-SDAG: ; %bb.0: 2106; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2107; GFX1010-SDAG-NEXT: global_load_ushort v4, v[0:1], off 2108; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2 2109; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3 2110; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 2111; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5 2112; GFX1010-SDAG-NEXT: global_store_short v[0:1], v4, off 2113; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 2114; 2115; GFX1100-SDAG-LABEL: test_writelane_bfloat: 2116; GFX1100-SDAG: ; %bb.0: 2117; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2118; GFX1100-SDAG-NEXT: global_load_u16 v4, v[0:1], off 2119; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2 2120; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3 2121; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 2122; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2123; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1 2124; GFX1100-SDAG-NEXT: global_store_b16 v[0:1], v4, off 2125; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 2126; 2127; GFX802-GISEL-LABEL: test_writelane_bfloat: 2128; GFX802-GISEL: ; %bb.0: 2129; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2130; GFX802-GISEL-NEXT: flat_load_ushort v4, v[0:1] 2131; GFX802-GISEL-NEXT: v_readfirstlane_b32 m0, v3 2132; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2 2133; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 2134; GFX802-GISEL-NEXT: s_nop 1 2135; GFX802-GISEL-NEXT: v_writelane_b32 v4, s4, m0 2136; GFX802-GISEL-NEXT: flat_store_short v[0:1], v4 2137; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 2138; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31] 2139; 2140; GFX1010-GISEL-LABEL: test_writelane_bfloat: 2141; GFX1010-GISEL: ; %bb.0: 2142; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2143; GFX1010-GISEL-NEXT: global_load_ushort v4, v[0:1], off 2144; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2 2145; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v3 2146; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) 2147; GFX1010-GISEL-NEXT: v_writelane_b32 v4, s4, s5 2148; GFX1010-GISEL-NEXT: global_store_short v[0:1], v4, off 2149; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31] 2150; 2151; GFX1100-GISEL-LABEL: test_writelane_bfloat: 2152; GFX1100-GISEL: ; %bb.0: 2153; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2154; GFX1100-GISEL-NEXT: global_load_u16 v4, v[0:1], off 2155; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2 2156; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3 2157; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) 2158; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2159; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1 2160; GFX1100-GISEL-NEXT: global_store_b16 v[0:1], v4, off 2161; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 2162 %oldval = load bfloat, ptr addrspace(1) %out 2163 %writelane = call bfloat @llvm.amdgcn.writelane.bf16(bfloat %src, i32 %src1, bfloat %oldval) 2164 store bfloat %writelane, ptr addrspace(1) %out, align 4 2165 ret void 2166} 2167 2168define void @test_writelane_i16(ptr addrspace(1) %out, i16 %src, i32 %src1) { 2169; GFX802-SDAG-LABEL: test_writelane_i16: 2170; GFX802-SDAG: ; %bb.0: 2171; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2172; GFX802-SDAG-NEXT: flat_load_ushort v4, v[0:1] 2173; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3 2174; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2 2175; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 2176; GFX802-SDAG-NEXT: s_nop 1 2177; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0 2178; GFX802-SDAG-NEXT: flat_store_short v[0:1], v4 2179; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 2180; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 2181; 2182; GFX1010-SDAG-LABEL: test_writelane_i16: 2183; GFX1010-SDAG: ; %bb.0: 2184; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2185; GFX1010-SDAG-NEXT: global_load_ushort v4, v[0:1], off 2186; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2 2187; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3 2188; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 2189; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5 2190; GFX1010-SDAG-NEXT: global_store_short v[0:1], v4, off 2191; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 2192; 2193; GFX1100-SDAG-LABEL: test_writelane_i16: 2194; GFX1100-SDAG: ; %bb.0: 2195; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2196; GFX1100-SDAG-NEXT: global_load_u16 v4, v[0:1], off 2197; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2 2198; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3 2199; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 2200; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2201; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1 2202; GFX1100-SDAG-NEXT: global_store_b16 v[0:1], v4, off 2203; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 2204; 2205; GFX802-GISEL-LABEL: test_writelane_i16: 2206; GFX802-GISEL: ; %bb.0: 2207; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2208; GFX802-GISEL-NEXT: flat_load_ushort v4, v[0:1] 2209; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v3 2210; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2 2211; GFX802-GISEL-NEXT: s_mov_b32 m0, s5 2212; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 2213; GFX802-GISEL-NEXT: v_writelane_b32 v4, s4, m0 2214; GFX802-GISEL-NEXT: flat_store_short v[0:1], v4 2215; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 2216; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31] 2217; 2218; GFX1010-GISEL-LABEL: test_writelane_i16: 2219; GFX1010-GISEL: ; %bb.0: 2220; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2221; GFX1010-GISEL-NEXT: global_load_ushort v4, v[0:1], off 2222; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2 2223; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v3 2224; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) 2225; GFX1010-GISEL-NEXT: v_writelane_b32 v4, s4, s5 2226; GFX1010-GISEL-NEXT: global_store_short v[0:1], v4, off 2227; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31] 2228; 2229; GFX1100-GISEL-LABEL: test_writelane_i16: 2230; GFX1100-GISEL: ; %bb.0: 2231; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2232; GFX1100-GISEL-NEXT: global_load_u16 v4, v[0:1], off 2233; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2 2234; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3 2235; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) 2236; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2237; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1 2238; GFX1100-GISEL-NEXT: global_store_b16 v[0:1], v4, off 2239; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 2240 %oldval = load i16, ptr addrspace(1) %out 2241 %writelane = call i16 @llvm.amdgcn.writelane.i16(i16 %src, i32 %src1, i16 %oldval) 2242 store i16 %writelane, ptr addrspace(1) %out, align 4 2243 ret void 2244} 2245 2246define void @test_writelane_v2f16(ptr addrspace(1) %out, <2 x half> %src, i32 %src1) { 2247; GFX802-SDAG-LABEL: test_writelane_v2f16: 2248; GFX802-SDAG: ; %bb.0: 2249; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2250; GFX802-SDAG-NEXT: flat_load_dword v4, v[0:1] 2251; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v3 2252; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v2 2253; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 2254; GFX802-SDAG-NEXT: s_nop 1 2255; GFX802-SDAG-NEXT: v_writelane_b32 v4, s4, m0 2256; GFX802-SDAG-NEXT: flat_store_dword v[0:1], v4 2257; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 2258; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 2259; 2260; GFX1010-SDAG-LABEL: test_writelane_v2f16: 2261; GFX1010-SDAG: ; %bb.0: 2262; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2263; GFX1010-SDAG-NEXT: global_load_dword v4, v[0:1], off 2264; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v2 2265; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v3 2266; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 2267; GFX1010-SDAG-NEXT: v_writelane_b32 v4, s4, s5 2268; GFX1010-SDAG-NEXT: global_store_dword v[0:1], v4, off 2269; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 2270; 2271; GFX1100-SDAG-LABEL: test_writelane_v2f16: 2272; GFX1100-SDAG: ; %bb.0: 2273; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2274; GFX1100-SDAG-NEXT: global_load_b32 v4, v[0:1], off 2275; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v2 2276; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v3 2277; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 2278; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 2279; GFX1100-SDAG-NEXT: v_writelane_b32 v4, s0, s1 2280; GFX1100-SDAG-NEXT: global_store_b32 v[0:1], v4, off 2281; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 2282; 2283; GFX802-GISEL-LABEL: test_writelane_v2f16: 2284; GFX802-GISEL: ; %bb.0: 2285; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2286; GFX802-GISEL-NEXT: flat_load_dword v4, v[0:1] 2287; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v3 2288; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2 2289; GFX802-GISEL-NEXT: s_mov_b32 m0, s5 2290; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 2291; GFX802-GISEL-NEXT: v_writelane_b32 v4, s4, m0 2292; GFX802-GISEL-NEXT: flat_store_dword v[0:1], v4 2293; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 2294; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31] 2295; 2296; GFX1010-GISEL-LABEL: test_writelane_v2f16: 2297; GFX1010-GISEL: ; %bb.0: 2298; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2299; GFX1010-GISEL-NEXT: global_load_dword v4, v[0:1], off 2300; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2 2301; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v3 2302; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) 2303; GFX1010-GISEL-NEXT: v_writelane_b32 v4, s4, s5 2304; GFX1010-GISEL-NEXT: global_store_dword v[0:1], v4, off 2305; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31] 2306; 2307; GFX1100-GISEL-LABEL: test_writelane_v2f16: 2308; GFX1100-GISEL: ; %bb.0: 2309; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2310; GFX1100-GISEL-NEXT: global_load_b32 v4, v[0:1], off 2311; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2 2312; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v3 2313; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) 2314; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 2315; GFX1100-GISEL-NEXT: v_writelane_b32 v4, s0, s1 2316; GFX1100-GISEL-NEXT: global_store_b32 v[0:1], v4, off 2317; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 2318 %oldval = load <2 x half>, ptr addrspace(1) %out 2319 %writelane = call <2 x half> @llvm.amdgcn.writelane.v2f16(<2 x half> %src, i32 %src1, <2 x half> %oldval) 2320 store <2 x half> %writelane, ptr addrspace(1) %out, align 4 2321 ret void 2322} 2323 2324define void @test_readlane_v2f32(ptr addrspace(1) %out, <2 x float> %src, i32 %src1) { 2325; GFX802-SDAG-LABEL: test_readlane_v2f32: 2326; GFX802-SDAG: ; %bb.0: 2327; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2328; GFX802-SDAG-NEXT: flat_load_dwordx2 v[5:6], v[0:1] 2329; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v4 2330; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v3 2331; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v2 2332; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 2333; GFX802-SDAG-NEXT: s_nop 0 2334; GFX802-SDAG-NEXT: v_writelane_b32 v6, s4, m0 2335; GFX802-SDAG-NEXT: v_writelane_b32 v5, s5, m0 2336; GFX802-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[5:6] 2337; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 2338; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 2339; 2340; GFX1010-SDAG-LABEL: test_readlane_v2f32: 2341; GFX1010-SDAG: ; %bb.0: 2342; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2343; GFX1010-SDAG-NEXT: global_load_dwordx2 v[5:6], v[0:1], off 2344; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v3 2345; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v4 2346; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v2 2347; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 2348; GFX1010-SDAG-NEXT: v_writelane_b32 v6, s4, s5 2349; GFX1010-SDAG-NEXT: v_writelane_b32 v5, s6, s5 2350; GFX1010-SDAG-NEXT: global_store_dwordx2 v[0:1], v[5:6], off 2351; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 2352; 2353; GFX1100-SDAG-LABEL: test_readlane_v2f32: 2354; GFX1100-SDAG: ; %bb.0: 2355; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2356; GFX1100-SDAG-NEXT: global_load_b64 v[5:6], v[0:1], off 2357; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v3 2358; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v4 2359; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v2 2360; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 2361; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2362; GFX1100-SDAG-NEXT: v_writelane_b32 v6, s0, s1 2363; GFX1100-SDAG-NEXT: v_writelane_b32 v5, s2, s1 2364; GFX1100-SDAG-NEXT: global_store_b64 v[0:1], v[5:6], off 2365; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 2366; 2367; GFX802-GISEL-LABEL: test_readlane_v2f32: 2368; GFX802-GISEL: ; %bb.0: 2369; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2370; GFX802-GISEL-NEXT: flat_load_dwordx2 v[5:6], v[0:1] 2371; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v4 2372; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2 2373; GFX802-GISEL-NEXT: v_readfirstlane_b32 s6, v3 2374; GFX802-GISEL-NEXT: s_mov_b32 m0, s5 2375; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 2376; GFX802-GISEL-NEXT: v_writelane_b32 v5, s4, m0 2377; GFX802-GISEL-NEXT: v_writelane_b32 v6, s6, m0 2378; GFX802-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[5:6] 2379; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 2380; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31] 2381; 2382; GFX1010-GISEL-LABEL: test_readlane_v2f32: 2383; GFX1010-GISEL: ; %bb.0: 2384; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2385; GFX1010-GISEL-NEXT: global_load_dwordx2 v[5:6], v[0:1], off 2386; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2 2387; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v4 2388; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s6, v3 2389; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) 2390; GFX1010-GISEL-NEXT: v_writelane_b32 v5, s4, s5 2391; GFX1010-GISEL-NEXT: v_writelane_b32 v6, s6, s5 2392; GFX1010-GISEL-NEXT: global_store_dwordx2 v[0:1], v[5:6], off 2393; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31] 2394; 2395; GFX1100-GISEL-LABEL: test_readlane_v2f32: 2396; GFX1100-GISEL: ; %bb.0: 2397; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2398; GFX1100-GISEL-NEXT: global_load_b64 v[5:6], v[0:1], off 2399; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2 2400; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v4 2401; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v3 2402; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) 2403; GFX1100-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2404; GFX1100-GISEL-NEXT: v_writelane_b32 v5, s0, s1 2405; GFX1100-GISEL-NEXT: v_writelane_b32 v6, s2, s1 2406; GFX1100-GISEL-NEXT: global_store_b64 v[0:1], v[5:6], off 2407; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 2408 %oldval = load <2 x float>, ptr addrspace(1) %out 2409 %writelane = call <2 x float> @llvm.amdgcn.writelane.v2f32(<2 x float> %src, i32 %src1, <2 x float> %oldval) 2410 store <2 x float> %writelane, ptr addrspace(1) %out, align 4 2411 ret void 2412} 2413 2414define void @test_writelane_v7i32(ptr addrspace(1) %out, <7 x i32> %src, i32 %src1) { 2415; GFX802-SDAG-LABEL: test_writelane_v7i32: 2416; GFX802-SDAG: ; %bb.0: 2417; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2418; GFX802-SDAG-NEXT: v_add_u32_e32 v17, vcc, 16, v0 2419; GFX802-SDAG-NEXT: flat_load_dwordx4 v[10:13], v[0:1] 2420; GFX802-SDAG-NEXT: v_addc_u32_e32 v18, vcc, 0, v1, vcc 2421; GFX802-SDAG-NEXT: flat_load_dwordx3 v[14:16], v[17:18] 2422; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v9 2423; GFX802-SDAG-NEXT: v_readfirstlane_b32 s7, v5 2424; GFX802-SDAG-NEXT: v_readfirstlane_b32 s8, v4 2425; GFX802-SDAG-NEXT: v_readfirstlane_b32 s9, v3 2426; GFX802-SDAG-NEXT: v_readfirstlane_b32 s10, v2 2427; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v8 2428; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v7 2429; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v6 2430; GFX802-SDAG-NEXT: s_waitcnt vmcnt(1) 2431; GFX802-SDAG-NEXT: v_writelane_b32 v13, s7, m0 2432; GFX802-SDAG-NEXT: v_writelane_b32 v12, s8, m0 2433; GFX802-SDAG-NEXT: v_writelane_b32 v11, s9, m0 2434; GFX802-SDAG-NEXT: v_writelane_b32 v10, s10, m0 2435; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 2436; GFX802-SDAG-NEXT: v_writelane_b32 v16, s4, m0 2437; GFX802-SDAG-NEXT: v_writelane_b32 v15, s5, m0 2438; GFX802-SDAG-NEXT: v_writelane_b32 v14, s6, m0 2439; GFX802-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[10:13] 2440; GFX802-SDAG-NEXT: flat_store_dwordx3 v[17:18], v[14:16] 2441; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 2442; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 2443; 2444; GFX1010-SDAG-LABEL: test_writelane_v7i32: 2445; GFX1010-SDAG: ; %bb.0: 2446; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2447; GFX1010-SDAG-NEXT: s_clause 0x1 2448; GFX1010-SDAG-NEXT: global_load_dwordx3 v[14:16], v[0:1], off offset:16 2449; GFX1010-SDAG-NEXT: global_load_dwordx4 v[10:13], v[0:1], off 2450; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v9 2451; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s8, v5 2452; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s9, v4 2453; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s10, v3 2454; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s11, v2 2455; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v8 2456; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v7 2457; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v6 2458; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(1) 2459; GFX1010-SDAG-NEXT: v_writelane_b32 v16, s4, s5 2460; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 2461; GFX1010-SDAG-NEXT: v_writelane_b32 v13, s8, s5 2462; GFX1010-SDAG-NEXT: v_writelane_b32 v12, s9, s5 2463; GFX1010-SDAG-NEXT: v_writelane_b32 v11, s10, s5 2464; GFX1010-SDAG-NEXT: v_writelane_b32 v10, s11, s5 2465; GFX1010-SDAG-NEXT: v_writelane_b32 v15, s6, s5 2466; GFX1010-SDAG-NEXT: v_writelane_b32 v14, s7, s5 2467; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[10:13], off 2468; GFX1010-SDAG-NEXT: global_store_dwordx3 v[0:1], v[14:16], off offset:16 2469; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 2470; 2471; GFX1100-SDAG-LABEL: test_writelane_v7i32: 2472; GFX1100-SDAG: ; %bb.0: 2473; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2474; GFX1100-SDAG-NEXT: s_clause 0x1 2475; GFX1100-SDAG-NEXT: global_load_b96 v[14:16], v[0:1], off offset:16 2476; GFX1100-SDAG-NEXT: global_load_b128 v[10:13], v[0:1], off 2477; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v9 2478; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s4, v5 2479; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s5, v4 2480; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s6, v3 2481; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s7, v2 2482; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v8 2483; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v7 2484; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v6 2485; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(1) 2486; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) 2487; GFX1100-SDAG-NEXT: v_writelane_b32 v16, s0, s1 2488; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 2489; GFX1100-SDAG-NEXT: v_writelane_b32 v13, s4, s1 2490; GFX1100-SDAG-NEXT: v_writelane_b32 v12, s5, s1 2491; GFX1100-SDAG-NEXT: v_writelane_b32 v11, s6, s1 2492; GFX1100-SDAG-NEXT: v_writelane_b32 v10, s7, s1 2493; GFX1100-SDAG-NEXT: v_writelane_b32 v15, s2, s1 2494; GFX1100-SDAG-NEXT: v_writelane_b32 v14, s3, s1 2495; GFX1100-SDAG-NEXT: s_clause 0x1 2496; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[10:13], off 2497; GFX1100-SDAG-NEXT: global_store_b96 v[0:1], v[14:16], off offset:16 2498; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 2499; 2500; GFX802-GISEL-LABEL: test_writelane_v7i32: 2501; GFX802-GISEL: ; %bb.0: 2502; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2503; GFX802-GISEL-NEXT: v_add_u32_e32 v18, vcc, 16, v0 2504; GFX802-GISEL-NEXT: flat_load_dwordx4 v[10:13], v[0:1] 2505; GFX802-GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v1, vcc 2506; GFX802-GISEL-NEXT: flat_load_dwordx4 v[14:17], v[18:19] 2507; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v9 2508; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2 2509; GFX802-GISEL-NEXT: v_readfirstlane_b32 s6, v3 2510; GFX802-GISEL-NEXT: v_readfirstlane_b32 s7, v4 2511; GFX802-GISEL-NEXT: v_readfirstlane_b32 s8, v5 2512; GFX802-GISEL-NEXT: s_mov_b32 m0, s5 2513; GFX802-GISEL-NEXT: v_readfirstlane_b32 s9, v6 2514; GFX802-GISEL-NEXT: v_readfirstlane_b32 s10, v7 2515; GFX802-GISEL-NEXT: v_readfirstlane_b32 s11, v8 2516; GFX802-GISEL-NEXT: s_waitcnt vmcnt(1) 2517; GFX802-GISEL-NEXT: v_writelane_b32 v10, s4, m0 2518; GFX802-GISEL-NEXT: v_writelane_b32 v11, s6, m0 2519; GFX802-GISEL-NEXT: v_writelane_b32 v12, s7, m0 2520; GFX802-GISEL-NEXT: v_writelane_b32 v13, s8, m0 2521; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 2522; GFX802-GISEL-NEXT: v_writelane_b32 v14, s9, m0 2523; GFX802-GISEL-NEXT: v_writelane_b32 v15, s10, m0 2524; GFX802-GISEL-NEXT: v_writelane_b32 v16, s11, m0 2525; GFX802-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[10:13] 2526; GFX802-GISEL-NEXT: flat_store_dwordx3 v[18:19], v[14:16] 2527; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 2528; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31] 2529; 2530; GFX1010-GISEL-LABEL: test_writelane_v7i32: 2531; GFX1010-GISEL: ; %bb.0: 2532; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2533; GFX1010-GISEL-NEXT: s_clause 0x1 2534; GFX1010-GISEL-NEXT: global_load_dwordx4 v[10:13], v[0:1], off 2535; GFX1010-GISEL-NEXT: global_load_dwordx4 v[14:17], v[0:1], off offset:16 2536; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2 2537; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v9 2538; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s6, v3 2539; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s7, v4 2540; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s8, v5 2541; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s9, v6 2542; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s10, v7 2543; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s11, v8 2544; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(1) 2545; GFX1010-GISEL-NEXT: v_writelane_b32 v10, s4, s5 2546; GFX1010-GISEL-NEXT: v_writelane_b32 v11, s6, s5 2547; GFX1010-GISEL-NEXT: v_writelane_b32 v12, s7, s5 2548; GFX1010-GISEL-NEXT: v_writelane_b32 v13, s8, s5 2549; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) 2550; GFX1010-GISEL-NEXT: v_writelane_b32 v14, s9, s5 2551; GFX1010-GISEL-NEXT: v_writelane_b32 v15, s10, s5 2552; GFX1010-GISEL-NEXT: v_writelane_b32 v16, s11, s5 2553; GFX1010-GISEL-NEXT: global_store_dwordx4 v[0:1], v[10:13], off 2554; GFX1010-GISEL-NEXT: global_store_dwordx3 v[0:1], v[14:16], off offset:16 2555; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31] 2556; 2557; GFX1100-GISEL-LABEL: test_writelane_v7i32: 2558; GFX1100-GISEL: ; %bb.0: 2559; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2560; GFX1100-GISEL-NEXT: s_clause 0x1 2561; GFX1100-GISEL-NEXT: global_load_b128 v[10:13], v[0:1], off 2562; GFX1100-GISEL-NEXT: global_load_b128 v[14:17], v[0:1], off offset:16 2563; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2 2564; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v9 2565; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v3 2566; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s3, v4 2567; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s4, v5 2568; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s5, v6 2569; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s6, v7 2570; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s7, v8 2571; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(1) 2572; GFX1100-GISEL-NEXT: v_writelane_b32 v10, s0, s1 2573; GFX1100-GISEL-NEXT: v_writelane_b32 v11, s2, s1 2574; GFX1100-GISEL-NEXT: v_writelane_b32 v12, s3, s1 2575; GFX1100-GISEL-NEXT: v_writelane_b32 v13, s4, s1 2576; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) 2577; GFX1100-GISEL-NEXT: v_writelane_b32 v14, s5, s1 2578; GFX1100-GISEL-NEXT: v_writelane_b32 v15, s6, s1 2579; GFX1100-GISEL-NEXT: v_writelane_b32 v16, s7, s1 2580; GFX1100-GISEL-NEXT: s_clause 0x1 2581; GFX1100-GISEL-NEXT: global_store_b128 v[0:1], v[10:13], off 2582; GFX1100-GISEL-NEXT: global_store_b96 v[0:1], v[14:16], off offset:16 2583; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 2584 %oldval = load <7 x i32>, ptr addrspace(1) %out 2585 %writelane = call <7 x i32> @llvm.amdgcn.writelane.v7i32(<7 x i32> %src, i32 %src1, <7 x i32> %oldval) 2586 store <7 x i32> %writelane, ptr addrspace(1) %out, align 4 2587 ret void 2588} 2589 2590define void @test_writelane_v8i16(ptr addrspace(1) %out, <8 x i16> %src, i32 %src1) { 2591; GFX802-SDAG-LABEL: test_writelane_v8i16: 2592; GFX802-SDAG: ; %bb.0: 2593; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2594; GFX802-SDAG-NEXT: flat_load_dwordx4 v[7:10], v[0:1] 2595; GFX802-SDAG-NEXT: v_readfirstlane_b32 m0, v6 2596; GFX802-SDAG-NEXT: v_readfirstlane_b32 s4, v5 2597; GFX802-SDAG-NEXT: v_readfirstlane_b32 s5, v4 2598; GFX802-SDAG-NEXT: v_readfirstlane_b32 s6, v3 2599; GFX802-SDAG-NEXT: v_readfirstlane_b32 s7, v2 2600; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 2601; GFX802-SDAG-NEXT: v_writelane_b32 v10, s4, m0 2602; GFX802-SDAG-NEXT: v_writelane_b32 v9, s5, m0 2603; GFX802-SDAG-NEXT: v_writelane_b32 v8, s6, m0 2604; GFX802-SDAG-NEXT: v_writelane_b32 v7, s7, m0 2605; GFX802-SDAG-NEXT: flat_store_dwordx4 v[0:1], v[7:10] 2606; GFX802-SDAG-NEXT: s_waitcnt vmcnt(0) 2607; GFX802-SDAG-NEXT: s_setpc_b64 s[30:31] 2608; 2609; GFX1010-SDAG-LABEL: test_writelane_v8i16: 2610; GFX1010-SDAG: ; %bb.0: 2611; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2612; GFX1010-SDAG-NEXT: global_load_dwordx4 v[7:10], v[0:1], off 2613; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s4, v5 2614; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s5, v6 2615; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s6, v4 2616; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s7, v3 2617; GFX1010-SDAG-NEXT: v_readfirstlane_b32 s8, v2 2618; GFX1010-SDAG-NEXT: s_waitcnt vmcnt(0) 2619; GFX1010-SDAG-NEXT: v_writelane_b32 v10, s4, s5 2620; GFX1010-SDAG-NEXT: v_writelane_b32 v9, s6, s5 2621; GFX1010-SDAG-NEXT: v_writelane_b32 v8, s7, s5 2622; GFX1010-SDAG-NEXT: v_writelane_b32 v7, s8, s5 2623; GFX1010-SDAG-NEXT: global_store_dwordx4 v[0:1], v[7:10], off 2624; GFX1010-SDAG-NEXT: s_setpc_b64 s[30:31] 2625; 2626; GFX1100-SDAG-LABEL: test_writelane_v8i16: 2627; GFX1100-SDAG: ; %bb.0: 2628; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2629; GFX1100-SDAG-NEXT: global_load_b128 v[7:10], v[0:1], off 2630; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s0, v5 2631; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s1, v6 2632; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s2, v4 2633; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s3, v3 2634; GFX1100-SDAG-NEXT: v_readfirstlane_b32 s4, v2 2635; GFX1100-SDAG-NEXT: s_waitcnt vmcnt(0) 2636; GFX1100-SDAG-NEXT: v_writelane_b32 v10, s0, s1 2637; GFX1100-SDAG-NEXT: v_writelane_b32 v9, s2, s1 2638; GFX1100-SDAG-NEXT: v_writelane_b32 v8, s3, s1 2639; GFX1100-SDAG-NEXT: v_writelane_b32 v7, s4, s1 2640; GFX1100-SDAG-NEXT: global_store_b128 v[0:1], v[7:10], off 2641; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31] 2642; 2643; GFX802-GISEL-LABEL: test_writelane_v8i16: 2644; GFX802-GISEL: ; %bb.0: 2645; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2646; GFX802-GISEL-NEXT: flat_load_dwordx4 v[7:10], v[0:1] 2647; GFX802-GISEL-NEXT: v_readfirstlane_b32 s5, v6 2648; GFX802-GISEL-NEXT: v_readfirstlane_b32 s4, v2 2649; GFX802-GISEL-NEXT: v_readfirstlane_b32 s6, v3 2650; GFX802-GISEL-NEXT: v_readfirstlane_b32 s7, v4 2651; GFX802-GISEL-NEXT: v_readfirstlane_b32 s8, v5 2652; GFX802-GISEL-NEXT: s_mov_b32 m0, s5 2653; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 2654; GFX802-GISEL-NEXT: v_writelane_b32 v7, s4, m0 2655; GFX802-GISEL-NEXT: v_writelane_b32 v8, s6, m0 2656; GFX802-GISEL-NEXT: v_writelane_b32 v9, s7, m0 2657; GFX802-GISEL-NEXT: v_writelane_b32 v10, s8, m0 2658; GFX802-GISEL-NEXT: flat_store_dwordx4 v[0:1], v[7:10] 2659; GFX802-GISEL-NEXT: s_waitcnt vmcnt(0) 2660; GFX802-GISEL-NEXT: s_setpc_b64 s[30:31] 2661; 2662; GFX1010-GISEL-LABEL: test_writelane_v8i16: 2663; GFX1010-GISEL: ; %bb.0: 2664; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2665; GFX1010-GISEL-NEXT: global_load_dwordx4 v[7:10], v[0:1], off 2666; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s4, v2 2667; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s5, v6 2668; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s6, v3 2669; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s7, v4 2670; GFX1010-GISEL-NEXT: v_readfirstlane_b32 s8, v5 2671; GFX1010-GISEL-NEXT: s_waitcnt vmcnt(0) 2672; GFX1010-GISEL-NEXT: v_writelane_b32 v7, s4, s5 2673; GFX1010-GISEL-NEXT: v_writelane_b32 v8, s6, s5 2674; GFX1010-GISEL-NEXT: v_writelane_b32 v9, s7, s5 2675; GFX1010-GISEL-NEXT: v_writelane_b32 v10, s8, s5 2676; GFX1010-GISEL-NEXT: global_store_dwordx4 v[0:1], v[7:10], off 2677; GFX1010-GISEL-NEXT: s_setpc_b64 s[30:31] 2678; 2679; GFX1100-GISEL-LABEL: test_writelane_v8i16: 2680; GFX1100-GISEL: ; %bb.0: 2681; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2682; GFX1100-GISEL-NEXT: global_load_b128 v[7:10], v[0:1], off 2683; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s0, v2 2684; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s1, v6 2685; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s2, v3 2686; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s3, v4 2687; GFX1100-GISEL-NEXT: v_readfirstlane_b32 s4, v5 2688; GFX1100-GISEL-NEXT: s_waitcnt vmcnt(0) 2689; GFX1100-GISEL-NEXT: v_writelane_b32 v7, s0, s1 2690; GFX1100-GISEL-NEXT: v_writelane_b32 v8, s2, s1 2691; GFX1100-GISEL-NEXT: v_writelane_b32 v9, s3, s1 2692; GFX1100-GISEL-NEXT: v_writelane_b32 v10, s4, s1 2693; GFX1100-GISEL-NEXT: global_store_b128 v[0:1], v[7:10], off 2694; GFX1100-GISEL-NEXT: s_setpc_b64 s[30:31] 2695 %oldval = load <8 x i16>, ptr addrspace(1) %out 2696 %writelane = call <8 x i16> @llvm.amdgcn.writelane.v8i16(<8 x i16> %src, i32 %src1, <8 x i16> %oldval) 2697 store <8 x i16> %writelane, ptr addrspace(1) %out, align 4 2698 ret void 2699} 2700 2701declare i32 @llvm.amdgcn.workitem.id.x() #2 2702 2703attributes #0 = { nounwind readnone convergent } 2704attributes #1 = { nounwind } 2705attributes #2 = { nounwind readnone } 2706