1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs %s -o - | FileCheck %s -check-prefix=GFX11 3 4declare i32 @llvm.amdgcn.workitem.id.x() 5 6define <2 x i64> @f1() #0 { 7; GFX11-LABEL: f1: 8; GFX11: ; %bb.0: 9; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX11-NEXT: v_mov_b32_e32 v0, 0 11; GFX11-NEXT: v_mov_b32_e32 v1, 0 12; GFX11-NEXT: v_mov_b32_e32 v2, 0 13; GFX11-NEXT: v_mov_b32_e32 v3, 0 14; GFX11-NEXT: s_setpc_b64 s[30:31] 15 ret <2 x i64> zeroinitializer 16} 17 18define void @f0() { 19; GFX11-LABEL: f0: 20; GFX11: ; %bb.0: ; %bb 21; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX11-NEXT: s_mov_b32 s2, s33 23; GFX11-NEXT: s_mov_b32 s33, s32 24; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 25; GFX11-NEXT: scratch_store_b32 off, v4, s33 ; 4-byte Folded Spill 26; GFX11-NEXT: s_mov_b32 exec_lo, s0 27; GFX11-NEXT: s_add_i32 s32, s32, 16 28; GFX11-NEXT: s_getpc_b64 s[0:1] 29; GFX11-NEXT: s_add_u32 s0, s0, f1@gotpcrel32@lo+4 30; GFX11-NEXT: s_addc_u32 s1, s1, f1@gotpcrel32@hi+12 31; GFX11-NEXT: v_writelane_b32 v4, s30, 0 32; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 33; GFX11-NEXT: v_writelane_b32 v4, s31, 1 34; GFX11-NEXT: s_waitcnt lgkmcnt(0) 35; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 36; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 37; GFX11-NEXT: v_readlane_b32 s31, v4, 1 38; GFX11-NEXT: v_readlane_b32 s30, v4, 0 39; GFX11-NEXT: s_mov_b32 s32, s33 40; GFX11-NEXT: s_xor_saveexec_b32 s0, -1 41; GFX11-NEXT: scratch_load_b32 v4, off, s33 ; 4-byte Folded Reload 42; GFX11-NEXT: s_mov_b32 exec_lo, s0 43; GFX11-NEXT: s_mov_b32 s33, s2 44; GFX11-NEXT: s_waitcnt vmcnt(0) 45; GFX11-NEXT: s_setpc_b64 s[30:31] 46bb: 47 %i = call <2 x i64> @f1() 48 ret void 49} 50 51; FIXME: This generates "instid1(/* invalid instid value */)". 52define amdgpu_kernel void @f2(i32 %arg, i32 %arg1, i32 %arg2, i1 %arg3, i32 %arg4, i1 %arg5, ptr %arg6, i32 %arg7, i32 %arg8, i32 %arg9, i32 %arg10, i1 %arg11) { 53; GFX11-LABEL: f2: 54; GFX11: ; %bb.0: ; %bb 55; GFX11-NEXT: s_mov_b64 s[16:17], s[4:5] 56; GFX11-NEXT: v_mov_b32_e32 v31, v0 57; GFX11-NEXT: s_load_b32 s19, s[16:17], 0x24 58; GFX11-NEXT: s_mov_b32 s12, s13 59; GFX11-NEXT: s_mov_b64 s[10:11], s[6:7] 60; GFX11-NEXT: s_mov_b64 s[6:7], s[2:3] 61; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v31 62; GFX11-NEXT: s_mov_b64 s[4:5], s[0:1] 63; GFX11-NEXT: s_mov_b32 s20, 0 64; GFX11-NEXT: s_mov_b32 s0, -1 65; GFX11-NEXT: s_mov_b32 s3, exec_lo 66; GFX11-NEXT: s_mov_b32 s32, 0 67; GFX11-NEXT: s_waitcnt lgkmcnt(0) 68; GFX11-NEXT: v_mul_lo_u32 v0, s19, v0 69; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 70; GFX11-NEXT: v_cmpx_eq_u32_e32 0, v0 71; GFX11-NEXT: s_cbranch_execz .LBB2_13 72; GFX11-NEXT: ; %bb.1: ; %bb14 73; GFX11-NEXT: s_load_b128 s[20:23], s[16:17], 0x2c 74; GFX11-NEXT: s_mov_b32 s18, 0 75; GFX11-NEXT: s_waitcnt lgkmcnt(0) 76; GFX11-NEXT: s_bitcmp1_b32 s21, 0 77; GFX11-NEXT: s_cselect_b32 s24, -1, 0 78; GFX11-NEXT: s_bitcmp0_b32 s21, 0 79; GFX11-NEXT: s_cbranch_scc0 .LBB2_3 80; GFX11-NEXT: ; %bb.2: ; %bb15 81; GFX11-NEXT: s_add_u32 s8, s16, 0x58 82; GFX11-NEXT: s_addc_u32 s9, s17, 0 83; GFX11-NEXT: s_getpc_b64 s[0:1] 84; GFX11-NEXT: s_add_u32 s0, s0, f0@gotpcrel32@lo+4 85; GFX11-NEXT: s_addc_u32 s1, s1, f0@gotpcrel32@hi+12 86; GFX11-NEXT: s_mov_b32 s13, s14 87; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 88; GFX11-NEXT: s_mov_b32 s21, s14 89; GFX11-NEXT: s_mov_b32 s14, s15 90; GFX11-NEXT: s_waitcnt lgkmcnt(0) 91; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 92; GFX11-NEXT: s_mov_b32 s14, s21 93; GFX11-NEXT: s_mov_b32 s2, -1 94; GFX11-NEXT: s_cbranch_execz .LBB2_4 95; GFX11-NEXT: s_branch .LBB2_12 96; GFX11-NEXT: .LBB2_3: 97; GFX11-NEXT: s_mov_b32 s2, 0 98; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s0 99; GFX11-NEXT: s_cbranch_vccnz .LBB2_12 100; GFX11-NEXT: .LBB2_4: ; %bb16 101; GFX11-NEXT: s_load_b32 s0, s[16:17], 0x54 102; GFX11-NEXT: s_bitcmp1_b32 s23, 0 103; GFX11-NEXT: s_cselect_b32 s9, -1, 0 104; GFX11-NEXT: s_and_b32 s1, s23, 1 105; GFX11-NEXT: s_waitcnt lgkmcnt(0) 106; GFX11-NEXT: s_bitcmp1_b32 s0, 0 107; GFX11-NEXT: s_mov_b32 s0, -1 108; GFX11-NEXT: s_cselect_b32 s8, -1, 0 109; GFX11-NEXT: s_cmp_eq_u32 s1, 0 110; GFX11-NEXT: s_cbranch_scc0 .LBB2_8 111; GFX11-NEXT: ; %bb.5: ; %bb18.preheader 112; GFX11-NEXT: s_load_b128 s[28:31], s[16:17], 0x44 113; GFX11-NEXT: s_waitcnt lgkmcnt(0) 114; GFX11-NEXT: s_mul_hi_u32 s0, s29, s28 115; GFX11-NEXT: s_mul_i32 s1, s29, s28 116; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 117; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 1 118; GFX11-NEXT: s_mov_b32 s1, 0 119; GFX11-NEXT: v_readfirstlane_b32 s0, v0 120; GFX11-NEXT: v_mov_b32_e32 v0, 0 121; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(SALU_CYCLE_1) 122; GFX11-NEXT: s_or_b32 s0, s0, 1 123; GFX11-NEXT: s_lshr_b32 s0, s0, s30 124; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 125; GFX11-NEXT: s_mul_i32 s0, s0, s22 126; GFX11-NEXT: s_mul_i32 s0, s0, s20 127; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 128; GFX11-NEXT: s_or_b32 s0, s19, s0 129; GFX11-NEXT: s_lshl_b64 s[20:21], s[0:1], 1 130; GFX11-NEXT: s_mov_b32 s0, s1 131; GFX11-NEXT: global_load_u16 v1, v0, s[20:21] 132; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s24 133; GFX11-NEXT: s_waitcnt vmcnt(0) 134; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 135; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 136; GFX11-NEXT: s_mov_b32 vcc_lo, 0 137; GFX11-NEXT: .p2align 6 138; GFX11-NEXT: .LBB2_6: ; %bb18 139; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 140; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 141; GFX11-NEXT: v_readfirstlane_b32 s13, v0 142; GFX11-NEXT: s_cmp_lg_u32 s1, 0 143; GFX11-NEXT: s_cselect_b32 s1, -1, 0 144; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 145; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s1 146; GFX11-NEXT: s_and_b32 s1, s8, s1 147; GFX11-NEXT: s_and_b32 s1, s1, exec_lo 148; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 149; GFX11-NEXT: v_readfirstlane_b32 s19, v2 150; GFX11-NEXT: s_cselect_b32 s1, s19, s13 151; GFX11-NEXT: s_and_b32 s13, 0xffff, s0 152; GFX11-NEXT: s_and_b32 s1, s1, 1 153; GFX11-NEXT: s_cmp_lg_u32 s13, 0 154; GFX11-NEXT: s_cselect_b32 s13, -1, 0 155; GFX11-NEXT: s_and_b32 s20, s9, exec_lo 156; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s13 157; GFX11-NEXT: v_readfirstlane_b32 s13, v1 158; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 159; GFX11-NEXT: v_readfirstlane_b32 s19, v2 160; GFX11-NEXT: s_cselect_b32 s13, s19, s13 161; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 162; GFX11-NEXT: s_bitcmp1_b32 s13, 0 163; GFX11-NEXT: s_cselect_b32 s13, 0x100, 0 164; GFX11-NEXT: s_or_b32 s0, s13, s0 165; GFX11-NEXT: s_cbranch_vccz .LBB2_6 166; GFX11-NEXT: ; %bb.7: ; %Flow 167; GFX11-NEXT: s_mov_b32 s0, 0 168; GFX11-NEXT: .LBB2_8: ; %Flow12 169; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 170; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s0 171; GFX11-NEXT: s_cbranch_vccz .LBB2_12 172; GFX11-NEXT: ; %bb.9: 173; GFX11-NEXT: s_xor_b32 s0, s8, -1 174; GFX11-NEXT: .LBB2_10: ; %bb17 175; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 176; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 177; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s0 178; GFX11-NEXT: s_cbranch_vccz .LBB2_10 179; GFX11-NEXT: ; %bb.11: ; %Flow6 180; GFX11-NEXT: s_mov_b32 s18, -1 181; GFX11-NEXT: .LBB2_12: ; %Flow11 182; GFX11-NEXT: s_and_b32 s20, s2, exec_lo 183; GFX11-NEXT: s_or_not1_b32 s0, s18, exec_lo 184; GFX11-NEXT: .LBB2_13: ; %Flow9 185; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s3 186; GFX11-NEXT: s_and_saveexec_b32 s3, s0 187; GFX11-NEXT: s_cbranch_execz .LBB2_15 188; GFX11-NEXT: ; %bb.14: ; %bb43 189; GFX11-NEXT: s_add_u32 s8, s16, 0x58 190; GFX11-NEXT: s_addc_u32 s9, s17, 0 191; GFX11-NEXT: s_getpc_b64 s[0:1] 192; GFX11-NEXT: s_add_u32 s0, s0, f0@gotpcrel32@lo+4 193; GFX11-NEXT: s_addc_u32 s1, s1, f0@gotpcrel32@hi+12 194; GFX11-NEXT: s_mov_b32 s13, s14 195; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 196; GFX11-NEXT: s_mov_b32 s14, s15 197; GFX11-NEXT: s_waitcnt lgkmcnt(0) 198; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 199; GFX11-NEXT: s_or_b32 s20, s20, exec_lo 200; GFX11-NEXT: .LBB2_15: ; %Flow14 201; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s3 202; GFX11-NEXT: s_and_saveexec_b32 s0, s20 203; GFX11-NEXT: ; %bb.16: ; %UnifiedUnreachableBlock 204; GFX11-NEXT: ; divergent unreachable 205; GFX11-NEXT: ; %bb.17: ; %UnifiedReturnBlock 206; GFX11-NEXT: s_endpgm 207bb: 208 %i = tail call i32 @llvm.amdgcn.workitem.id.x() 209 %i12 = mul i32 %arg, %i 210 %i13 = icmp ult i32 %i12, 1 211 br i1 %i13, label %bb14, label %bb43 212 213bb14: 214 br i1 %arg3, label %bb16, label %bb15 215 216bb15: 217 call void @f0() 218 unreachable 219 220bb16: 221 br i1 %arg5, label %bb17, label %bb18 222 223bb17: 224 br i1 %arg11, label %bb17, label %bb43 225 226bb18: 227 %i19 = phi i16 [ %i38, %bb18 ], [ 0, %bb16 ] 228 %i20 = phi i16 [ %i42, %bb18 ], [ 0, %bb16 ] 229 %i21 = zext i32 %arg7 to i64 230 %i22 = zext i32 %arg8 to i64 231 %i23 = mul i64 %i22, %i21 232 %i24 = lshr i64 %i23, 1 233 %i25 = trunc i64 %i24 to i32 234 %i26 = or i32 1, %i25 235 %i27 = lshr i32 %i26, %arg9 236 %i28 = mul i32 %i27, %arg4 237 %i29 = mul i32 %i28, %arg2 238 %i30 = or i32 %arg, %i29 239 %i31 = zext i32 %i30 to i64 240 %i32 = getelementptr { [2 x i8] }, ptr addrspace(1) null, i64 %i31 241 %i33 = load i16, ptr addrspace(1) %i32, align 2 242 %i34 = icmp ult i16 %i33, 1 243 %i35 = icmp ne i16 %i19, 0 244 %i36 = select i1 %arg11, i1 %i35, i1 false 245 %i37 = select i1 %i36, i1 %i35, i1 %arg3 246 %i38 = select i1 %i37, i16 1, i16 0 247 %i39 = icmp ne i16 %i20, 0 248 %i40 = select i1 %arg5, i1 %i39, i1 %i34 249 %i41 = select i1 %i40, i16 256, i16 0 250 %i42 = or i16 %i41, %i20 251 br label %bb18 252 253bb43: 254 call void @f0() 255 unreachable 256} 257 258attributes #0 = { noinline optnone } 259 260!llvm.module.flags = !{!0} 261!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 262