1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-SDAG 3; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL 4; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG 5; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL 6 7declare void @llvm.amdgcn.exp.row.i32(i32, i32, i32, i32, i32, i32, i1, i32) 8declare void @llvm.amdgcn.exp.row.f32(i32, i32, float, float, float, float, i1, i32) 9declare i32 @llvm.amdgcn.workitem.id.x() 10 11define amdgpu_kernel void @undef_i32() #0 { 12; GFX11-LABEL: undef_i32: 13; GFX11: ; %bb.0: 14; GFX11-NEXT: s_mov_b32 m0, 0 15; GFX11-NEXT: exp pos0 off, off, off, off row_en 16; GFX11-NEXT: exp pos1 off, off, off, off done row_en 17; GFX11-NEXT: s_endpgm 18; 19; GFX12-LABEL: undef_i32: 20; GFX12: ; %bb.0: 21; GFX12-NEXT: s_mov_b32 m0, 0 22; GFX12-NEXT: export pos0 off, off, off, off row_en 23; GFX12-NEXT: export pos1 off, off, off, off done row_en 24; GFX12-NEXT: s_endpgm 25 call void @llvm.amdgcn.exp.row.i32(i32 12, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i1 false, i32 0) 26 call void @llvm.amdgcn.exp.row.i32(i32 13, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i1 true, i32 0) 27 ret void 28} 29 30define amdgpu_kernel void @undef_f32() #0 { 31; GFX11-LABEL: undef_f32: 32; GFX11: ; %bb.0: 33; GFX11-NEXT: s_mov_b32 m0, 0 34; GFX11-NEXT: exp pos0 off, off, off, off row_en 35; GFX11-NEXT: exp pos1 off, off, off, off done row_en 36; GFX11-NEXT: s_endpgm 37; 38; GFX12-LABEL: undef_f32: 39; GFX12: ; %bb.0: 40; GFX12-NEXT: s_mov_b32 m0, 0 41; GFX12-NEXT: export pos0 off, off, off, off row_en 42; GFX12-NEXT: export pos1 off, off, off, off done row_en 43; GFX12-NEXT: s_endpgm 44 call void @llvm.amdgcn.exp.row.f32(i32 12, i32 0, float undef, float undef, float undef, float undef, i1 false, i32 0) 45 call void @llvm.amdgcn.exp.row.f32(i32 13, i32 0, float undef, float undef, float undef, float undef, i1 true, i32 0) 46 ret void 47} 48 49define amdgpu_kernel void @zero_i32() #0 { 50; GFX11-LABEL: zero_i32: 51; GFX11: ; %bb.0: 52; GFX11-NEXT: v_mov_b32_e32 v0, 0 53; GFX11-NEXT: s_mov_b32 m0, 0 54; GFX11-NEXT: exp pos0 v0, v0, v0, off row_en 55; GFX11-NEXT: exp pos1 v0, v0, v0, off done row_en 56; GFX11-NEXT: s_endpgm 57; 58; GFX12-LABEL: zero_i32: 59; GFX12: ; %bb.0: 60; GFX12-NEXT: v_mov_b32_e32 v0, 0 61; GFX12-NEXT: s_mov_b32 m0, 0 62; GFX12-NEXT: export pos0 v0, v0, v0, off row_en 63; GFX12-NEXT: export pos1 v0, v0, v0, off done row_en 64; GFX12-NEXT: s_endpgm 65 call void @llvm.amdgcn.exp.row.i32(i32 12, i32 7, i32 0, i32 0, i32 0, i32 undef, i1 false, i32 0) 66 call void @llvm.amdgcn.exp.row.i32(i32 13, i32 7, i32 0, i32 0, i32 0, i32 undef, i1 true, i32 0) 67 ret void 68} 69 70define amdgpu_kernel void @one_f32() #0 { 71; GFX11-LABEL: one_f32: 72; GFX11: ; %bb.0: 73; GFX11-NEXT: v_mov_b32_e32 v0, 1.0 74; GFX11-NEXT: s_mov_b32 m0, 0 75; GFX11-NEXT: exp pos0 v0, v0, v0, off row_en 76; GFX11-NEXT: exp pos1 v0, v0, v0, off done row_en 77; GFX11-NEXT: s_endpgm 78; 79; GFX12-LABEL: one_f32: 80; GFX12: ; %bb.0: 81; GFX12-NEXT: v_mov_b32_e32 v0, 1.0 82; GFX12-NEXT: s_mov_b32 m0, 0 83; GFX12-NEXT: export pos0 v0, v0, v0, off row_en 84; GFX12-NEXT: export pos1 v0, v0, v0, off done row_en 85; GFX12-NEXT: s_endpgm 86 call void @llvm.amdgcn.exp.row.f32(i32 12, i32 7, float 1.0, float 1.0, float 1.0, float undef, i1 false, i32 0) 87 call void @llvm.amdgcn.exp.row.f32(i32 13, i32 7, float 1.0, float 1.0, float 1.0, float undef, i1 true, i32 0) 88 ret void 89} 90 91define amdgpu_kernel void @id_i32() #0 { 92; GFX11-LABEL: id_i32: 93; GFX11: ; %bb.0: 94; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 95; GFX11-NEXT: s_mov_b32 m0, 0 96; GFX11-NEXT: exp pos0 v0, off, off, off done row_en 97; GFX11-NEXT: s_endpgm 98; 99; GFX12-LABEL: id_i32: 100; GFX12: ; %bb.0: 101; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 102; GFX12-NEXT: s_mov_b32 m0, 0 103; GFX12-NEXT: export pos0 v0, off, off, off done row_en 104; GFX12-NEXT: s_endpgm 105 %id = call i32 @llvm.amdgcn.workitem.id.x() 106 call void @llvm.amdgcn.exp.row.i32(i32 12, i32 1, i32 %id, i32 undef, i32 undef, i32 undef, i1 true, i32 0) 107 ret void 108} 109 110define amdgpu_kernel void @id_arg_i32(i32 %row) #0 { 111; GFX11-LABEL: id_arg_i32: 112; GFX11: ; %bb.0: 113; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24 114; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0 115; GFX11-NEXT: s_waitcnt lgkmcnt(0) 116; GFX11-NEXT: s_mov_b32 m0, s0 117; GFX11-NEXT: exp pos0 v0, off, off, off done row_en 118; GFX11-NEXT: s_endpgm 119; 120; GFX12-LABEL: id_arg_i32: 121; GFX12: ; %bb.0: 122; GFX12-NEXT: s_load_b32 s0, s[4:5], 0x24 123; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0 124; GFX12-NEXT: s_wait_kmcnt 0x0 125; GFX12-NEXT: s_mov_b32 m0, s0 126; GFX12-NEXT: export pos0 v0, off, off, off done row_en 127; GFX12-NEXT: s_endpgm 128 %id = call i32 @llvm.amdgcn.workitem.id.x() 129 call void @llvm.amdgcn.exp.row.i32(i32 12, i32 1, i32 %id, i32 undef, i32 undef, i32 undef, i1 true, i32 %row) 130 ret void 131} 132 133; Divergent row number just causes a readfirstlane for now. 134define amdgpu_kernel void @id_row_i32() #0 { 135; GFX11-SDAG-LABEL: id_row_i32: 136; GFX11-SDAG: ; %bb.0: 137; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 138; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 139; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0 140; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0x63 141; GFX11-SDAG-NEXT: s_mov_b32 m0, s0 142; GFX11-SDAG-NEXT: exp pos0 v0, off, off, off done row_en 143; GFX11-SDAG-NEXT: s_endpgm 144; 145; GFX11-GISEL-LABEL: id_row_i32: 146; GFX11-GISEL: ; %bb.0: 147; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 148; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0x63 149; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 150; GFX11-GISEL-NEXT: v_readfirstlane_b32 m0, v0 151; GFX11-GISEL-NEXT: exp pos0 v1, off, off, off done row_en 152; GFX11-GISEL-NEXT: s_endpgm 153; 154; GFX12-SDAG-LABEL: id_row_i32: 155; GFX12-SDAG: ; %bb.0: 156; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 157; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 158; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v0 159; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0x63 160; GFX12-SDAG-NEXT: s_mov_b32 m0, s0 161; GFX12-SDAG-NEXT: export pos0 v0, off, off, off done row_en 162; GFX12-SDAG-NEXT: s_endpgm 163; 164; GFX12-GISEL-LABEL: id_row_i32: 165; GFX12-GISEL: ; %bb.0: 166; GFX12-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 167; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0x63 168; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 169; GFX12-GISEL-NEXT: v_readfirstlane_b32 m0, v0 170; GFX12-GISEL-NEXT: export pos0 v1, off, off, off done row_en 171; GFX12-GISEL-NEXT: s_endpgm 172 %id = call i32 @llvm.amdgcn.workitem.id.x() 173 call void @llvm.amdgcn.exp.row.i32(i32 12, i32 1, i32 99, i32 undef, i32 undef, i32 undef, i1 true, i32 %id) 174 ret void 175} 176