1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-SDAG -enable-var-scope %s 3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -global-isel -global-isel-abort=2 < %s | FileCheck -check-prefix=CHECK-GISEL -enable-var-scope %s 4 5define void @test_readfirstlane_i1(ptr addrspace(1) %out, i1 %src) { 6; CHECK-SDAG-LABEL: test_readfirstlane_i1: 7; CHECK-SDAG: ; %bb.0: 8; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 10; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 1 11; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4 12; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2 13; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) 14; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] 15; 16; CHECK-GISEL-LABEL: test_readfirstlane_i1: 17; CHECK-GISEL: ; %bb.0: 18; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 20; CHECK-GISEL-NEXT: s_and_b32 s4, s4, 1 21; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4 22; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2 23; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) 24; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] 25 %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %src) 26 store i1 %readfirstlane, ptr addrspace(1) %out, align 4 27 ret void 28} 29 30define void @test_readfirstlane_i1_inreg(ptr addrspace(1) %out, i1 inreg %src) { 31; CHECK-SDAG-LABEL: test_readfirstlane_i1_inreg: 32; CHECK-SDAG: ; %bb.0: 33; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; CHECK-SDAG-NEXT: s_and_b32 s4, s16, 1 35; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4 36; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2 37; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) 38; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] 39; 40; CHECK-GISEL-LABEL: test_readfirstlane_i1_inreg: 41; CHECK-GISEL: ; %bb.0: 42; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 43; CHECK-GISEL-NEXT: s_and_b32 s4, s16, 1 44; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4 45; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2 46; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) 47; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] 48 %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %src) 49 store i1 %readfirstlane, ptr addrspace(1) %out, align 4 50 ret void 51} 52 53define void @test_readfirstlane_i1_select(ptr addrspace(1) %out, i32 %src, i32 %src1) { 54; CHECK-SDAG-LABEL: test_readfirstlane_i1_select: 55; CHECK-SDAG: ; %bb.0: 56; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 57; CHECK-SDAG-NEXT: v_cmp_lt_u32_e32 vcc, 42, v2 58; CHECK-SDAG-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 59; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v4 60; CHECK-SDAG-NEXT: s_bitcmp1_b32 s4, 0 61; CHECK-SDAG-NEXT: s_cselect_b64 vcc, -1, 0 62; CHECK-SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 63; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2 64; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) 65; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] 66; 67; CHECK-GISEL-LABEL: test_readfirstlane_i1_select: 68; CHECK-GISEL: ; %bb.0: 69; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 70; CHECK-GISEL-NEXT: v_cmp_lt_u32_e32 vcc, 42, v2 71; CHECK-GISEL-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc 72; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v4 73; CHECK-GISEL-NEXT: s_and_b32 s4, 1, s4 74; CHECK-GISEL-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 75; CHECK-GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 76; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2 77; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) 78; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] 79 %cmp = icmp ugt i32 %src, 42 80 %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %cmp) 81 %sel = select i1 %readfirstlane, i32 %src, i32 %src1 82 store i32 %sel, ptr addrspace(1) %out, align 4 83 ret void 84} 85 86define void @test_readfirstlane_i1_load(ptr addrspace(1) %out, ptr addrspace(1) %in) { 87; CHECK-SDAG-LABEL: test_readfirstlane_i1_load: 88; CHECK-SDAG: ; %bb.0: 89; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 90; CHECK-SDAG-NEXT: flat_load_ubyte v2, v[2:3] 91; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) 92; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 93; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 1 94; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4 95; CHECK-SDAG-NEXT: flat_store_byte v[0:1], v2 96; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) 97; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] 98; 99; CHECK-GISEL-LABEL: test_readfirstlane_i1_load: 100; CHECK-GISEL: ; %bb.0: 101; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; CHECK-GISEL-NEXT: flat_load_ubyte v2, v[2:3] 103; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) 104; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 105; CHECK-GISEL-NEXT: s_and_b32 s4, s4, 1 106; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4 107; CHECK-GISEL-NEXT: flat_store_byte v[0:1], v2 108; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) 109; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] 110 %load = load i1, ptr addrspace(1) %in 111 %readfirstlane = call i1 @llvm.amdgcn.readfirstlane.i1(i1 %load) 112 store i1 %readfirstlane, ptr addrspace(1) %out, align 4 113 ret void 114} 115 116define void @test_readfirstlane_i32(ptr addrspace(1) %out, i32 %src) { 117; CHECK-SDAG-LABEL: test_readfirstlane_i32: 118; CHECK-SDAG: ; %bb.0: 119; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 120; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 121; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s4 122; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2 123; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) 124; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] 125; 126; CHECK-GISEL-LABEL: test_readfirstlane_i32: 127; CHECK-GISEL: ; %bb.0: 128; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 130; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4 131; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2 132; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) 133; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] 134 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %src) 135 store i32 %readfirstlane, ptr addrspace(1) %out, align 4 136 ret void 137} 138 139define void @test_readfirstlane_i64(ptr addrspace(1) %out, i64 %src) { 140; CHECK-SDAG-LABEL: test_readfirstlane_i64: 141; CHECK-SDAG: ; %bb.0: 142; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 143; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v3 144; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v2 145; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s5 146; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s4 147; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 148; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) 149; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] 150; 151; CHECK-GISEL-LABEL: test_readfirstlane_i64: 152; CHECK-GISEL: ; %bb.0: 153; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 154; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 155; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 156; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4 157; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s5 158; CHECK-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 159; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) 160; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] 161 %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %src) 162 store i64 %readfirstlane, ptr addrspace(1) %out, align 4 163 ret void 164} 165 166define void @test_readfirstlane_f64(ptr addrspace(1) %out, double %src) { 167; CHECK-SDAG-LABEL: test_readfirstlane_f64: 168; CHECK-SDAG: ; %bb.0: 169; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v3 171; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v2 172; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s5 173; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s4 174; CHECK-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 175; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) 176; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] 177; 178; CHECK-GISEL-LABEL: test_readfirstlane_f64: 179; CHECK-GISEL: ; %bb.0: 180; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 181; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 182; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 183; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s4 184; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s5 185; CHECK-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 186; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) 187; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] 188 %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double %src) 189 store double %readfirstlane, ptr addrspace(1) %out, align 4 190 ret void 191} 192 193define amdgpu_kernel void @test_readfirstlane_imm_i32(ptr addrspace(1) %out) { 194; CHECK-SDAG-LABEL: test_readfirstlane_imm_i32: 195; CHECK-SDAG: ; %bb.0: 196; CHECK-SDAG-NEXT: s_mov_b32 s0, 32 197; CHECK-SDAG-NEXT: ;;#ASMSTART 198; CHECK-SDAG-NEXT: ; use s0 199; CHECK-SDAG-NEXT: ;;#ASMEND 200; CHECK-SDAG-NEXT: s_endpgm 201; 202; CHECK-GISEL-LABEL: test_readfirstlane_imm_i32: 203; CHECK-GISEL: ; %bb.0: 204; CHECK-GISEL-NEXT: s_mov_b32 s0, 32 205; CHECK-GISEL-NEXT: ;;#ASMSTART 206; CHECK-GISEL-NEXT: ; use s0 207; CHECK-GISEL-NEXT: ;;#ASMEND 208; CHECK-GISEL-NEXT: s_endpgm 209 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 32) 210 call void asm sideeffect "; use $0", "s"(i32 %readfirstlane) 211 ret void 212} 213 214define amdgpu_kernel void @test_readfirstlane_imm_i64(ptr addrspace(1) %out) { 215; CHECK-SDAG-LABEL: test_readfirstlane_imm_i64: 216; CHECK-SDAG: ; %bb.0: 217; CHECK-SDAG-NEXT: s_mov_b64 s[0:1], 32 218; CHECK-SDAG-NEXT: ;;#ASMSTART 219; CHECK-SDAG-NEXT: ; use s[0:1] 220; CHECK-SDAG-NEXT: ;;#ASMEND 221; CHECK-SDAG-NEXT: s_endpgm 222; 223; CHECK-GISEL-LABEL: test_readfirstlane_imm_i64: 224; CHECK-GISEL: ; %bb.0: 225; CHECK-GISEL-NEXT: s_mov_b64 s[0:1], 32 226; CHECK-GISEL-NEXT: ;;#ASMSTART 227; CHECK-GISEL-NEXT: ; use s[0:1] 228; CHECK-GISEL-NEXT: ;;#ASMEND 229; CHECK-GISEL-NEXT: s_endpgm 230 %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 32) 231 call void asm sideeffect "; use $0", "s"(i64 %readfirstlane) 232 ret void 233} 234 235define amdgpu_kernel void @test_readfirstlane_imm_f64(ptr addrspace(1) %out) { 236; CHECK-SDAG-LABEL: test_readfirstlane_imm_f64: 237; CHECK-SDAG: ; %bb.0: 238; CHECK-SDAG-NEXT: s_mov_b32 s0, 0 239; CHECK-SDAG-NEXT: s_mov_b32 s1, 0x40400000 240; CHECK-SDAG-NEXT: ;;#ASMSTART 241; CHECK-SDAG-NEXT: ; use s[0:1] 242; CHECK-SDAG-NEXT: ;;#ASMEND 243; CHECK-SDAG-NEXT: s_endpgm 244; 245; CHECK-GISEL-LABEL: test_readfirstlane_imm_f64: 246; CHECK-GISEL: ; %bb.0: 247; CHECK-GISEL-NEXT: s_mov_b32 s0, 0 248; CHECK-GISEL-NEXT: s_mov_b32 s1, 0x40400000 249; CHECK-GISEL-NEXT: ;;#ASMSTART 250; CHECK-GISEL-NEXT: ; use s[0:1] 251; CHECK-GISEL-NEXT: ;;#ASMEND 252; CHECK-GISEL-NEXT: s_endpgm 253 %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double 32.0) 254 call void asm sideeffect "; use $0", "s"(double %readfirstlane) 255 ret void 256} 257 258define amdgpu_kernel void @test_readfirstlane_imm_fold_i32(ptr addrspace(1) %out) { 259; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i32: 260; CHECK-SDAG: ; %bb.0: 261; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 262; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, 32 263; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) 264; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0 265; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1 266; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2 267; CHECK-SDAG-NEXT: s_endpgm 268; 269; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_i32: 270; CHECK-GISEL: ; %bb.0: 271; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 272; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, 32 273; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) 274; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s0 275; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s1 276; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2 277; CHECK-GISEL-NEXT: s_endpgm 278 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 32) 279 store i32 %readfirstlane, ptr addrspace(1) %out, align 4 280 ret void 281} 282 283define amdgpu_kernel void @test_readfirstlane_imm_fold_i64(ptr addrspace(1) %out) { 284; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_i64: 285; CHECK-SDAG: ; %bb.0: 286; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 287; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, 32 288; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, 0 289; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) 290; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1 291; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0 292; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 293; CHECK-SDAG-NEXT: s_endpgm 294; 295; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_i64: 296; CHECK-GISEL: ; %bb.0: 297; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 298; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 32 299; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2 300; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3 301; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) 302; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1 303; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0 304; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 305; CHECK-GISEL-NEXT: s_endpgm 306 %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 32) 307 store i64 %readfirstlane, ptr addrspace(1) %out, align 4 308 ret void 309} 310 311define amdgpu_kernel void @test_readfirstlane_imm_fold_f64(ptr addrspace(1) %out) { 312; CHECK-SDAG-LABEL: test_readfirstlane_imm_fold_f64: 313; CHECK-SDAG: ; %bb.0: 314; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 315; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, 0 316; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, 0x40400000 317; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) 318; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1 319; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0 320; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 321; CHECK-SDAG-NEXT: s_endpgm 322; 323; CHECK-GISEL-LABEL: test_readfirstlane_imm_fold_f64: 324; CHECK-GISEL: ; %bb.0: 325; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 326; CHECK-GISEL-NEXT: s_mov_b32 s2, 0 327; CHECK-GISEL-NEXT: s_mov_b32 s3, 0x40400000 328; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2 329; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3 330; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) 331; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1 332; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0 333; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 334; CHECK-GISEL-NEXT: s_endpgm 335 %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double 32.0) 336 store double %readfirstlane, ptr addrspace(1) %out, align 4 337 ret void 338} 339 340define amdgpu_kernel void @test_readfirstlane_m0(ptr addrspace(1) %out) { 341; CHECK-SDAG-LABEL: test_readfirstlane_m0: 342; CHECK-SDAG: ; %bb.0: 343; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 344; CHECK-SDAG-NEXT: ;;#ASMSTART 345; CHECK-SDAG-NEXT: s_mov_b32 m0, -1 346; CHECK-SDAG-NEXT: ;;#ASMEND 347; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, m0 348; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) 349; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0 350; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1 351; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2 352; CHECK-SDAG-NEXT: s_endpgm 353; 354; CHECK-GISEL-LABEL: test_readfirstlane_m0: 355; CHECK-GISEL: ; %bb.0: 356; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 357; CHECK-GISEL-NEXT: ;;#ASMSTART 358; CHECK-GISEL-NEXT: s_mov_b32 m0, -1 359; CHECK-GISEL-NEXT: ;;#ASMEND 360; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, m0 361; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) 362; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s0 363; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s1 364; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2 365; CHECK-GISEL-NEXT: s_endpgm 366 %m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"() 367 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %m0) 368 store i32 %readfirstlane, ptr addrspace(1) %out, align 4 369 ret void 370} 371 372define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i32(ptr addrspace(1) %out) { 373; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i32: 374; CHECK-SDAG: ; %bb.0: 375; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 376; CHECK-SDAG-NEXT: ;;#ASMSTART 377; CHECK-SDAG-NEXT: s_mov_b32 s2, 0 378; CHECK-SDAG-NEXT: ;;#ASMEND 379; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s2 380; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) 381; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s0 382; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s1 383; CHECK-SDAG-NEXT: flat_store_dword v[0:1], v2 384; CHECK-SDAG-NEXT: s_endpgm 385; 386; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_i32: 387; CHECK-GISEL: ; %bb.0: 388; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 389; CHECK-GISEL-NEXT: ;;#ASMSTART 390; CHECK-GISEL-NEXT: s_mov_b32 s2, 0 391; CHECK-GISEL-NEXT: ;;#ASMEND 392; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s2 393; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) 394; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s0 395; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s1 396; CHECK-GISEL-NEXT: flat_store_dword v[0:1], v2 397; CHECK-GISEL-NEXT: s_endpgm 398 %sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"() 399 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %sgpr) 400 store i32 %readfirstlane, ptr addrspace(1) %out, align 4 401 ret void 402} 403 404define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_i64(ptr addrspace(1) %out) { 405; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_i64: 406; CHECK-SDAG: ; %bb.0: 407; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 408; CHECK-SDAG-NEXT: ;;#ASMSTART 409; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0 410; CHECK-SDAG-NEXT: ;;#ASMEND 411; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2 412; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3 413; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) 414; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1 415; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0 416; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 417; CHECK-SDAG-NEXT: s_endpgm 418; 419; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_i64: 420; CHECK-GISEL: ; %bb.0: 421; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 422; CHECK-GISEL-NEXT: ;;#ASMSTART 423; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 0 424; CHECK-GISEL-NEXT: ;;#ASMEND 425; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2 426; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3 427; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) 428; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1 429; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0 430; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 431; CHECK-GISEL-NEXT: s_endpgm 432 %sgpr = call i64 asm "s_mov_b64 $0, 0", "=s"() 433 %readfirstlane = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %sgpr) 434 store i64 %readfirstlane, ptr addrspace(1) %out, align 4 435 ret void 436} 437 438define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr_f64(ptr addrspace(1) %out) { 439; CHECK-SDAG-LABEL: test_readfirstlane_copy_from_sgpr_f64: 440; CHECK-SDAG: ; %bb.0: 441; CHECK-SDAG-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 442; CHECK-SDAG-NEXT: ;;#ASMSTART 443; CHECK-SDAG-NEXT: s_mov_b64 s[2:3], 0 444; CHECK-SDAG-NEXT: ;;#ASMEND 445; CHECK-SDAG-NEXT: v_mov_b32_e32 v0, s2 446; CHECK-SDAG-NEXT: v_mov_b32_e32 v1, s3 447; CHECK-SDAG-NEXT: s_waitcnt lgkmcnt(0) 448; CHECK-SDAG-NEXT: v_mov_b32_e32 v3, s1 449; CHECK-SDAG-NEXT: v_mov_b32_e32 v2, s0 450; CHECK-SDAG-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 451; CHECK-SDAG-NEXT: s_endpgm 452; 453; CHECK-GISEL-LABEL: test_readfirstlane_copy_from_sgpr_f64: 454; CHECK-GISEL: ; %bb.0: 455; CHECK-GISEL-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 456; CHECK-GISEL-NEXT: ;;#ASMSTART 457; CHECK-GISEL-NEXT: s_mov_b64 s[2:3], 0 458; CHECK-GISEL-NEXT: ;;#ASMEND 459; CHECK-GISEL-NEXT: v_mov_b32_e32 v0, s2 460; CHECK-GISEL-NEXT: v_mov_b32_e32 v1, s3 461; CHECK-GISEL-NEXT: s_waitcnt lgkmcnt(0) 462; CHECK-GISEL-NEXT: v_mov_b32_e32 v3, s1 463; CHECK-GISEL-NEXT: v_mov_b32_e32 v2, s0 464; CHECK-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 465; CHECK-GISEL-NEXT: s_endpgm 466 %sgpr = call double asm "s_mov_b64 $0, 0", "=s"() 467 %readfirstlane = call double @llvm.amdgcn.readfirstlane.f64(double %sgpr) 468 store double %readfirstlane, ptr addrspace(1) %out, align 4 469 ret void 470} 471 472define amdgpu_kernel void @test_readfirstlane_fi(ptr addrspace(1) %out) { 473; CHECK-SDAG-LABEL: test_readfirstlane_fi: 474; CHECK-SDAG: ; %bb.0: 475; CHECK-SDAG-NEXT: s_add_u32 s0, s0, s17 476; CHECK-SDAG-NEXT: s_addc_u32 s1, s1, 0 477; CHECK-SDAG-NEXT: s_mov_b32 s4, 0 478; CHECK-SDAG-NEXT: ;;#ASMSTART 479; CHECK-SDAG-NEXT: ; use s4 480; CHECK-SDAG-NEXT: ;;#ASMEND 481; CHECK-SDAG-NEXT: s_endpgm 482; 483; CHECK-GISEL-LABEL: test_readfirstlane_fi: 484; CHECK-GISEL: ; %bb.0: 485; CHECK-GISEL-NEXT: s_add_u32 s0, s0, s17 486; CHECK-GISEL-NEXT: s_addc_u32 s1, s1, 0 487; CHECK-GISEL-NEXT: s_mov_b32 s4, 0 488; CHECK-GISEL-NEXT: ;;#ASMSTART 489; CHECK-GISEL-NEXT: ; use s4 490; CHECK-GISEL-NEXT: ;;#ASMEND 491; CHECK-GISEL-NEXT: s_endpgm 492 %alloca = alloca i32, addrspace(5) 493 %int = ptrtoint ptr addrspace(5) %alloca to i32 494 %readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %int) 495 call void asm sideeffect "; use $0", "s"(i32 %readfirstlane) 496 ret void 497} 498 499define void @test_readfirstlane_half(ptr addrspace(1) %out, half %src) { 500; CHECK-SDAG-LABEL: test_readfirstlane_half: 501; CHECK-SDAG: ; %bb.0: 502; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 503; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 504; CHECK-SDAG-NEXT: ;;#ASMSTART 505; CHECK-SDAG-NEXT: ; use s4 506; CHECK-SDAG-NEXT: ;;#ASMEND 507; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] 508; 509; CHECK-GISEL-LABEL: test_readfirstlane_half: 510; CHECK-GISEL: ; %bb.0: 511; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 512; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 513; CHECK-GISEL-NEXT: ;;#ASMSTART 514; CHECK-GISEL-NEXT: ; use s4 515; CHECK-GISEL-NEXT: ;;#ASMEND 516; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] 517 %x = call half @llvm.amdgcn.readfirstlane.f16(half %src) 518 call void asm sideeffect "; use $0", "s"(half %x) 519 ret void 520} 521 522define void @test_readfirstlane_float(ptr addrspace(1) %out, float %src) { 523; CHECK-SDAG-LABEL: test_readfirstlane_float: 524; CHECK-SDAG: ; %bb.0: 525; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 526; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 527; CHECK-SDAG-NEXT: ;;#ASMSTART 528; CHECK-SDAG-NEXT: ; use s4 529; CHECK-SDAG-NEXT: ;;#ASMEND 530; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] 531; 532; CHECK-GISEL-LABEL: test_readfirstlane_float: 533; CHECK-GISEL: ; %bb.0: 534; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 535; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 536; CHECK-GISEL-NEXT: ;;#ASMSTART 537; CHECK-GISEL-NEXT: ; use s4 538; CHECK-GISEL-NEXT: ;;#ASMEND 539; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] 540 %x = call float @llvm.amdgcn.readfirstlane.f32(float %src) 541 call void asm sideeffect "; use $0", "s"(float %x) 542 ret void 543} 544 545define void @test_readfirstlane_bfloat(ptr addrspace(1) %out, bfloat %src) { 546; CHECK-SDAG-LABEL: test_readfirstlane_bfloat: 547; CHECK-SDAG: ; %bb.0: 548; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 549; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 550; CHECK-SDAG-NEXT: ;;#ASMSTART 551; CHECK-SDAG-NEXT: ; use s4 552; CHECK-SDAG-NEXT: ;;#ASMEND 553; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] 554; 555; CHECK-GISEL-LABEL: test_readfirstlane_bfloat: 556; CHECK-GISEL: ; %bb.0: 557; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 558; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 559; CHECK-GISEL-NEXT: ;;#ASMSTART 560; CHECK-GISEL-NEXT: ; use s4 561; CHECK-GISEL-NEXT: ;;#ASMEND 562; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] 563 %x = call bfloat @llvm.amdgcn.readfirstlane.bf16(bfloat %src) 564 call void asm sideeffect "; use $0", "s"(bfloat %x) 565 ret void 566} 567 568define void @test_readfirstlane_i16(ptr addrspace(1) %out, i16 %src) { 569; CHECK-SDAG-LABEL: test_readfirstlane_i16: 570; CHECK-SDAG: ; %bb.0: 571; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 572; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 573; CHECK-SDAG-NEXT: s_and_b32 s4, s4, 0xffff 574; CHECK-SDAG-NEXT: ;;#ASMSTART 575; CHECK-SDAG-NEXT: ; use s4 576; CHECK-SDAG-NEXT: ;;#ASMEND 577; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] 578; 579; CHECK-GISEL-LABEL: test_readfirstlane_i16: 580; CHECK-GISEL: ; %bb.0: 581; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 582; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 583; CHECK-GISEL-NEXT: ;;#ASMSTART 584; CHECK-GISEL-NEXT: ; use s4 585; CHECK-GISEL-NEXT: ;;#ASMEND 586; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] 587 %x = call i16 @llvm.amdgcn.readfirstlane.i16(i16 %src) 588 call void asm sideeffect "; use $0", "s"(i16 %x) 589 ret void 590} 591 592define void @test_readfirstlane_v2f16(ptr addrspace(1) %out, <2 x half> %src) { 593; CHECK-SDAG-LABEL: test_readfirstlane_v2f16: 594; CHECK-SDAG: ; %bb.0: 595; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 596; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 597; CHECK-SDAG-NEXT: ;;#ASMSTART 598; CHECK-SDAG-NEXT: ; use s4 599; CHECK-SDAG-NEXT: ;;#ASMEND 600; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] 601; 602; CHECK-GISEL-LABEL: test_readfirstlane_v2f16: 603; CHECK-GISEL: ; %bb.0: 604; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 605; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 606; CHECK-GISEL-NEXT: ;;#ASMSTART 607; CHECK-GISEL-NEXT: ; use s4 608; CHECK-GISEL-NEXT: ;;#ASMEND 609; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] 610 %x = call <2 x half> @llvm.amdgcn.readfirstlane.v2f16(<2 x half> %src) 611 call void asm sideeffect "; use $0", "s"(<2 x half> %x) 612 ret void 613} 614 615define void @test_readfirstlane_v2f32(ptr addrspace(1) %out, <2 x float> %src) { 616; CHECK-SDAG-LABEL: test_readfirstlane_v2f32: 617; CHECK-SDAG: ; %bb.0: 618; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 619; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 620; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 621; CHECK-SDAG-NEXT: ;;#ASMSTART 622; CHECK-SDAG-NEXT: ; use s[4:5] 623; CHECK-SDAG-NEXT: ;;#ASMEND 624; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] 625; 626; CHECK-GISEL-LABEL: test_readfirstlane_v2f32: 627; CHECK-GISEL: ; %bb.0: 628; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 629; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 630; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 631; CHECK-GISEL-NEXT: ;;#ASMSTART 632; CHECK-GISEL-NEXT: ; use s[4:5] 633; CHECK-GISEL-NEXT: ;;#ASMEND 634; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] 635 %x = call <2 x float> @llvm.amdgcn.readfirstlane.v2f32(<2 x float> %src) 636 call void asm sideeffect "; use $0", "s"(<2 x float> %x) 637 ret void 638} 639 640define void @test_readfirstlane_v7i32(ptr addrspace(1) %out, <7 x i32> %src) { 641; CHECK-SDAG-LABEL: test_readfirstlane_v7i32: 642; CHECK-SDAG: ; %bb.0: 643; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 644; CHECK-SDAG-NEXT: v_readfirstlane_b32 s10, v8 645; CHECK-SDAG-NEXT: v_readfirstlane_b32 s9, v7 646; CHECK-SDAG-NEXT: v_readfirstlane_b32 s8, v6 647; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 648; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 649; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 650; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 651; CHECK-SDAG-NEXT: ;;#ASMSTART 652; CHECK-SDAG-NEXT: ; use s[4:10] 653; CHECK-SDAG-NEXT: ;;#ASMEND 654; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] 655; 656; CHECK-GISEL-LABEL: test_readfirstlane_v7i32: 657; CHECK-GISEL: ; %bb.0: 658; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 659; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 660; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 661; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 662; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 663; CHECK-GISEL-NEXT: v_readfirstlane_b32 s8, v6 664; CHECK-GISEL-NEXT: v_readfirstlane_b32 s9, v7 665; CHECK-GISEL-NEXT: v_readfirstlane_b32 s10, v8 666; CHECK-GISEL-NEXT: ;;#ASMSTART 667; CHECK-GISEL-NEXT: ; use s[4:10] 668; CHECK-GISEL-NEXT: ;;#ASMEND 669; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] 670 %x = call <7 x i32> @llvm.amdgcn.readfirstlane.v7i32(<7 x i32> %src) 671 call void asm sideeffect "; use $0", "s"(<7 x i32> %x) 672 ret void 673} 674 675define void @test_readfirstlane_v8i16(ptr addrspace(1) %out, <8 x i16> %src) { 676; CHECK-SDAG-LABEL: test_readfirstlane_v8i16: 677; CHECK-SDAG: ; %bb.0: 678; CHECK-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 679; CHECK-SDAG-NEXT: v_readfirstlane_b32 s7, v5 680; CHECK-SDAG-NEXT: v_readfirstlane_b32 s6, v4 681; CHECK-SDAG-NEXT: v_readfirstlane_b32 s5, v3 682; CHECK-SDAG-NEXT: v_readfirstlane_b32 s4, v2 683; CHECK-SDAG-NEXT: ;;#ASMSTART 684; CHECK-SDAG-NEXT: ; use s[4:7] 685; CHECK-SDAG-NEXT: ;;#ASMEND 686; CHECK-SDAG-NEXT: s_setpc_b64 s[30:31] 687; 688; CHECK-GISEL-LABEL: test_readfirstlane_v8i16: 689; CHECK-GISEL: ; %bb.0: 690; CHECK-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 691; CHECK-GISEL-NEXT: v_readfirstlane_b32 s4, v2 692; CHECK-GISEL-NEXT: v_readfirstlane_b32 s5, v3 693; CHECK-GISEL-NEXT: v_readfirstlane_b32 s6, v4 694; CHECK-GISEL-NEXT: v_readfirstlane_b32 s7, v5 695; CHECK-GISEL-NEXT: ;;#ASMSTART 696; CHECK-GISEL-NEXT: ; use s[4:7] 697; CHECK-GISEL-NEXT: ;;#ASMEND 698; CHECK-GISEL-NEXT: s_setpc_b64 s[30:31] 699 %x = call <8 x i16> @llvm.amdgcn.readfirstlane.v8i16(<8 x i16> %src) 700 call void asm sideeffect "; use $0", "s"(<8 x i16> %x) 701 ret void 702} 703