1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefix=GCN %s 3 4define amdgpu_ps void @test_export_zeroes_f32() #0 { 5; GCN-LABEL: test_export_zeroes_f32: 6; GCN: ; %bb.0: 7; GCN-NEXT: s_setprio 2 8; GCN-NEXT: v_mov_b32_e32 v0, 0 9; GCN-NEXT: exp mrt0 off, off, off, off 10; GCN-NEXT: exp mrt0 off, off, off, off done 11; GCN-NEXT: s_setprio 0 12; GCN-NEXT: s_nop 0 13; GCN-NEXT: s_nop 0 14; GCN-NEXT: s_endpgm 15 call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 false, i1 false) 16 call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float 0.0, float 0.0, float 0.0, float 0.0, i1 true, i1 false) 17 ret void 18} 19 20define amdgpu_ps void @test_export_en_src0_f32() #0 { 21; GCN-LABEL: test_export_en_src0_f32: 22; GCN: ; %bb.0: 23; GCN-NEXT: s_setprio 2 24; GCN-NEXT: v_mov_b32_e32 v0, 4.0 25; GCN-NEXT: v_mov_b32_e32 v1, 0.5 26; GCN-NEXT: v_mov_b32_e32 v2, 2.0 27; GCN-NEXT: v_mov_b32_e32 v3, 1.0 28; GCN-NEXT: exp mrt0 v3, off, off, off done 29; GCN-NEXT: s_setprio 0 30; GCN-NEXT: s_nop 0 31; GCN-NEXT: s_nop 0 32; GCN-NEXT: s_endpgm 33 call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 34 ret void 35} 36 37define amdgpu_gs void @test_export_gs() #0 { 38; GCN-LABEL: test_export_gs: 39; GCN: ; %bb.0: 40; GCN-NEXT: s_setprio 2 41; GCN-NEXT: v_mov_b32_e32 v0, 4.0 42; GCN-NEXT: v_mov_b32_e32 v1, 0.5 43; GCN-NEXT: v_mov_b32_e32 v2, 2.0 44; GCN-NEXT: v_mov_b32_e32 v3, 1.0 45; GCN-NEXT: exp mrt0 off, v2, off, off done 46; GCN-NEXT: s_setprio 0 47; GCN-NEXT: s_nop 0 48; GCN-NEXT: s_nop 0 49; GCN-NEXT: s_endpgm 50 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 51 ret void 52} 53 54define amdgpu_hs void @test_export_hs() #0 { 55; GCN-LABEL: test_export_hs: 56; GCN: ; %bb.0: 57; GCN-NEXT: s_setprio 2 58; GCN-NEXT: v_mov_b32_e32 v0, 4.0 59; GCN-NEXT: v_mov_b32_e32 v1, 0.5 60; GCN-NEXT: v_mov_b32_e32 v2, 2.0 61; GCN-NEXT: v_mov_b32_e32 v3, 1.0 62; GCN-NEXT: exp mrt0 off, v2, off, off done 63; GCN-NEXT: s_setprio 0 64; GCN-NEXT: s_nop 0 65; GCN-NEXT: s_nop 0 66; GCN-NEXT: s_endpgm 67 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 68 ret void 69} 70 71define amdgpu_gfx void @test_export_gfx(float %v) #0 { 72; GCN-LABEL: test_export_gfx: 73; GCN: ; %bb.0: 74; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GCN-NEXT: v_mov_b32_e32 v1, 4.0 76; GCN-NEXT: v_mov_b32_e32 v2, 0.5 77; GCN-NEXT: v_mov_b32_e32 v3, 2.0 78; GCN-NEXT: exp mrt0 off, v3, off, off done 79; GCN-NEXT: s_setprio 0 80; GCN-NEXT: s_waitcnt_expcnt null, 0x0 81; GCN-NEXT: s_nop 0 82; GCN-NEXT: s_nop 0 83; GCN-NEXT: s_setprio 2 84; GCN-NEXT: s_waitcnt expcnt(0) 85; GCN-NEXT: s_setpc_b64 s[30:31] 86 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %v, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 87 ret void 88} 89 90define amdgpu_cs void @test_export_cs() #0 { 91; GCN-LABEL: test_export_cs: 92; GCN: ; %bb.0: 93; GCN-NEXT: v_mov_b32_e32 v0, 4.0 94; GCN-NEXT: v_mov_b32_e32 v1, 0.5 95; GCN-NEXT: v_mov_b32_e32 v2, 2.0 96; GCN-NEXT: v_mov_b32_e32 v3, 1.0 97; GCN-NEXT: exp mrt0 off, v2, off, off done 98; GCN-NEXT: s_endpgm 99 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 100 ret void 101} 102 103define amdgpu_kernel void @test_export_kernel() #0 { 104; GCN-LABEL: test_export_kernel: 105; GCN: ; %bb.0: 106; GCN-NEXT: v_mov_b32_e32 v0, 4.0 107; GCN-NEXT: v_mov_b32_e32 v1, 0.5 108; GCN-NEXT: v_mov_b32_e32 v2, 2.0 109; GCN-NEXT: v_mov_b32_e32 v3, 1.0 110; GCN-NEXT: exp mrt0 off, v2, off, off done 111; GCN-NEXT: s_endpgm 112 call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false) 113 ret void 114} 115 116define amdgpu_gfx void @test_no_export_gfx(float %v) #0 { 117; GCN-LABEL: test_no_export_gfx: 118; GCN: ; %bb.0: 119; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 120; GCN-NEXT: s_setpc_b64 s[30:31] 121 ret void 122} 123 124define amdgpu_ps void @test_no_export_ps(float %v) #0 { 125; GCN-LABEL: test_no_export_ps: 126; GCN: ; %bb.0: 127; GCN-NEXT: s_endpgm 128 ret void 129} 130 131define amdgpu_ps void @test_if_export_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { 132; GCN-LABEL: test_if_export_f32: 133; GCN: ; %bb.0: 134; GCN-NEXT: s_setprio 2 135; GCN-NEXT: s_mov_b32 s0, exec_lo 136; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0 137; GCN-NEXT: s_cbranch_execz .LBB9_2 138; GCN-NEXT: ; %bb.1: ; %exp 139; GCN-NEXT: exp mrt0 v1, v2, v3, v4 140; GCN-NEXT: s_setprio 0 141; GCN-NEXT: s_waitcnt_expcnt null, 0x0 142; GCN-NEXT: s_nop 0 143; GCN-NEXT: s_nop 0 144; GCN-NEXT: s_setprio 2 145; GCN-NEXT: .LBB9_2: ; %end 146; GCN-NEXT: s_endpgm 147 %cc = icmp eq i32 %flag, 0 148 br i1 %cc, label %end, label %exp 149 150exp: 151 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 false) 152 br label %end 153 154end: 155 ret void 156} 157 158define amdgpu_ps void @test_if_export_vm_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { 159; GCN-LABEL: test_if_export_vm_f32: 160; GCN: ; %bb.0: 161; GCN-NEXT: s_setprio 2 162; GCN-NEXT: s_mov_b32 s0, exec_lo 163; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0 164; GCN-NEXT: s_cbranch_execz .LBB10_2 165; GCN-NEXT: ; %bb.1: ; %exp 166; GCN-NEXT: exp mrt0 v1, v2, v3, v4 167; GCN-NEXT: s_setprio 0 168; GCN-NEXT: s_waitcnt_expcnt null, 0x0 169; GCN-NEXT: s_nop 0 170; GCN-NEXT: s_nop 0 171; GCN-NEXT: s_setprio 2 172; GCN-NEXT: .LBB10_2: ; %end 173; GCN-NEXT: s_endpgm 174 %cc = icmp eq i32 %flag, 0 175 br i1 %cc, label %end, label %exp 176 177exp: 178 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 false, i1 true) 179 br label %end 180 181end: 182 ret void 183} 184 185define amdgpu_ps void @test_if_export_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { 186; GCN-LABEL: test_if_export_done_f32: 187; GCN: ; %bb.0: 188; GCN-NEXT: s_setprio 2 189; GCN-NEXT: s_mov_b32 s0, exec_lo 190; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0 191; GCN-NEXT: s_cbranch_execz .LBB11_2 192; GCN-NEXT: ; %bb.1: ; %exp 193; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done 194; GCN-NEXT: s_setprio 0 195; GCN-NEXT: s_waitcnt_expcnt null, 0x0 196; GCN-NEXT: s_nop 0 197; GCN-NEXT: s_nop 0 198; GCN-NEXT: s_setprio 2 199; GCN-NEXT: .LBB11_2: ; %end 200; GCN-NEXT: s_endpgm 201 %cc = icmp eq i32 %flag, 0 202 br i1 %cc, label %end, label %exp 203 204exp: 205 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 false) 206 br label %end 207 208end: 209 ret void 210} 211 212define amdgpu_ps void @test_if_export_vm_done_f32(i32 %flag, float %x, float %y, float %z, float %w) #0 { 213; GCN-LABEL: test_if_export_vm_done_f32: 214; GCN: ; %bb.0: 215; GCN-NEXT: s_setprio 2 216; GCN-NEXT: s_mov_b32 s0, exec_lo 217; GCN-NEXT: v_cmpx_ne_u32_e32 0, v0 218; GCN-NEXT: s_cbranch_execz .LBB12_2 219; GCN-NEXT: ; %bb.1: ; %exp 220; GCN-NEXT: exp mrt0 v1, v2, v3, v4 done 221; GCN-NEXT: s_setprio 0 222; GCN-NEXT: s_waitcnt_expcnt null, 0x0 223; GCN-NEXT: s_nop 0 224; GCN-NEXT: s_nop 0 225; GCN-NEXT: s_setprio 2 226; GCN-NEXT: .LBB12_2: ; %end 227; GCN-NEXT: s_endpgm 228 %cc = icmp eq i32 %flag, 0 229 br i1 %cc, label %end, label %exp 230 231exp: 232 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %x, float %y, float %z, float %w, i1 true, i1 true) 233 br label %end 234 235end: 236 ret void 237} 238 239define amdgpu_ps void @test_export_pos_before_param_across_load(i32 %idx) #0 { 240; GCN-LABEL: test_export_pos_before_param_across_load: 241; GCN: ; %bb.0: 242; GCN-NEXT: s_setprio 2 243; GCN-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen 244; GCN-NEXT: v_mov_b32_e32 v1, 0 245; GCN-NEXT: v_mov_b32_e32 v2, 1.0 246; GCN-NEXT: v_mov_b32_e32 v3, 0.5 247; GCN-NEXT: s_waitcnt vmcnt(0) 248; GCN-NEXT: exp pos0 v1, v1, v1, v0 done 249; GCN-NEXT: exp invalid_target_32 v2, v2, v2, v2 250; GCN-NEXT: exp invalid_target_33 v2, v2, v2, v3 251; GCN-NEXT: s_setprio 0 252; GCN-NEXT: s_nop 0 253; GCN-NEXT: s_nop 0 254; GCN-NEXT: s_endpgm 255 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float 1.0, float 1.0, float 1.0, float 1.0, i1 false, i1 false) 256 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float 1.0, float 1.0, float 1.0, float 0.5, i1 false, i1 false) 257 %load = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) undef, i32 %idx, i32 0, i32 0) 258 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float %load, i1 true, i1 false) 259 ret void 260} 261 262define amdgpu_ps void @test_export_across_store_load(i32 %idx, float %v) #0 { 263; GCN-LABEL: test_export_across_store_load: 264; GCN: ; %bb.0: 265; GCN-NEXT: s_setprio 2 266; GCN-NEXT: v_mov_b32_e32 v2, 0 267; GCN-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 268; GCN-NEXT: s_delay_alu instid0(VALU_DEP_2) 269; GCN-NEXT: v_cndmask_b32_e32 v0, 16, v2, vcc_lo 270; GCN-NEXT: v_mov_b32_e32 v2, 0 271; GCN-NEXT: scratch_store_b32 v0, v1, off 272; GCN-NEXT: scratch_load_b32 v0, off, off 273; GCN-NEXT: v_mov_b32_e32 v1, 1.0 274; GCN-NEXT: exp pos0 v2, v2, v2, v1 done 275; GCN-NEXT: s_setprio 0 276; GCN-NEXT: s_waitcnt_expcnt null, 0x0 277; GCN-NEXT: s_nop 0 278; GCN-NEXT: s_nop 0 279; GCN-NEXT: s_setprio 2 280; GCN-NEXT: s_waitcnt vmcnt(0) 281; GCN-NEXT: exp invalid_target_32 v0, v2, v1, v2 282; GCN-NEXT: exp invalid_target_33 v0, v2, v1, v2 283; GCN-NEXT: s_setprio 0 284; GCN-NEXT: s_nop 0 285; GCN-NEXT: s_nop 0 286; GCN-NEXT: s_endpgm 287 %data0 = alloca <4 x float>, align 8, addrspace(5) 288 %data1 = alloca <4 x float>, align 8, addrspace(5) 289 %cmp = icmp eq i32 %idx, 1 290 %data = select i1 %cmp, ptr addrspace(5) %data0, ptr addrspace(5) %data1 291 store float %v, ptr addrspace(5) %data, align 8 292 call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float 0.0, float 0.0, float 0.0, float 1.0, i1 true, i1 false) 293 %load0 = load float, ptr addrspace(5) %data0, align 8 294 call void @llvm.amdgcn.exp.f32(i32 32, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false) 295 call void @llvm.amdgcn.exp.f32(i32 33, i32 15, float %load0, float 0.0, float 1.0, float 0.0, i1 false, i1 false) 296 ret void 297} 298 299define amdgpu_ps void @test_export_in_callee(float %v) #0 { 300; GCN-LABEL: test_export_in_callee: 301; GCN: ; %bb.0: 302; GCN-NEXT: s_setprio 2 303; GCN-NEXT: s_getpc_b64 s[0:1] 304; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4 305; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12 306; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0 307; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 308; GCN-NEXT: s_mov_b32 s32, 0 309; GCN-NEXT: s_waitcnt lgkmcnt(0) 310; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1] 311; GCN-NEXT: s_endpgm 312 %x = fadd float %v, 1.0 313 call void @test_export_gfx(float %x) 314 ret void 315} 316 317define amdgpu_ps void @test_export_in_callee_prio(float %v) #0 { 318; GCN-LABEL: test_export_in_callee_prio: 319; GCN: ; %bb.0: 320; GCN-NEXT: s_setprio 2 321; GCN-NEXT: s_mov_b32 s32, 0 322; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0 323; GCN-NEXT: s_setprio 2 324; GCN-NEXT: s_getpc_b64 s[0:1] 325; GCN-NEXT: s_add_u32 s0, s0, test_export_gfx@gotpcrel32@lo+4 326; GCN-NEXT: s_addc_u32 s1, s1, test_export_gfx@gotpcrel32@hi+12 327; GCN-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 328; GCN-NEXT: s_waitcnt lgkmcnt(0) 329; GCN-NEXT: s_swappc_b64 s[30:31], s[0:1] 330; GCN-NEXT: s_endpgm 331 %x = fadd float %v, 1.0 332 call void @llvm.amdgcn.s.setprio(i16 0) 333 call void @test_export_gfx(float %x) 334 ret void 335} 336 337declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1 338declare void @llvm.amdgcn.exp.i32(i32, i32, i32, i32, i32, i32, i1, i1) #1 339declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #2 340declare void @llvm.amdgcn.s.setprio(i16) 341 342attributes #0 = { nounwind } 343attributes #1 = { nounwind inaccessiblememonly } 344attributes #2 = { nounwind readnone } 345