1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s 3; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s 5 6define amdgpu_kernel void @br_cc_f16( 7; SI-LABEL: br_cc_f16: 8; SI: ; %bb.0: ; %entry 9; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 10; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd 11; SI-NEXT: s_mov_b32 s7, 0xf000 12; SI-NEXT: s_mov_b32 s6, -1 13; SI-NEXT: s_mov_b32 s10, s6 14; SI-NEXT: s_waitcnt lgkmcnt(0) 15; SI-NEXT: s_mov_b32 s4, s2 16; SI-NEXT: s_mov_b32 s5, s3 17; SI-NEXT: s_mov_b32 s11, s7 18; SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 glc 19; SI-NEXT: s_waitcnt vmcnt(0) 20; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 glc 21; SI-NEXT: s_waitcnt vmcnt(0) 22; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 23; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 24; SI-NEXT: v_cmp_nlt_f32_e32 vcc, v0, v1 25; SI-NEXT: s_cbranch_vccnz .LBB0_2 26; SI-NEXT: ; %bb.1: ; %one 27; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 28; SI-NEXT: s_branch .LBB0_3 29; SI-NEXT: .LBB0_2: ; %two 30; SI-NEXT: v_cvt_f16_f32_e32 v0, v1 31; SI-NEXT: .LBB0_3: ; %one 32; SI-NEXT: s_mov_b32 s2, s6 33; SI-NEXT: s_mov_b32 s3, s7 34; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 35; SI-NEXT: s_endpgm 36; 37; VI-LABEL: br_cc_f16: 38; VI: ; %bb.0: ; %entry 39; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 40; VI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34 41; VI-NEXT: s_mov_b32 s7, 0xf000 42; VI-NEXT: s_mov_b32 s6, -1 43; VI-NEXT: s_mov_b32 s10, s6 44; VI-NEXT: s_waitcnt lgkmcnt(0) 45; VI-NEXT: s_mov_b32 s4, s2 46; VI-NEXT: s_mov_b32 s5, s3 47; VI-NEXT: s_mov_b32 s11, s7 48; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 glc 49; VI-NEXT: s_waitcnt vmcnt(0) 50; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 glc 51; VI-NEXT: s_waitcnt vmcnt(0) 52; VI-NEXT: s_mov_b32 s2, s6 53; VI-NEXT: s_mov_b32 s3, s7 54; VI-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1 55; VI-NEXT: s_cbranch_vccnz .LBB0_2 56; VI-NEXT: ; %bb.1: ; %one 57; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 58; VI-NEXT: s_endpgm 59; VI-NEXT: .LBB0_2: ; %two 60; VI-NEXT: buffer_store_short v1, off, s[0:3], 0 61; VI-NEXT: s_endpgm 62; 63; GFX11-LABEL: br_cc_f16: 64; GFX11: ; %bb.0: ; %entry 65; GFX11-NEXT: s_clause 0x1 66; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 67; GFX11-NEXT: s_load_b64 s[8:9], s[4:5], 0x34 68; GFX11-NEXT: s_mov_b32 s6, -1 69; GFX11-NEXT: s_mov_b32 s7, 0x31016000 70; GFX11-NEXT: s_mov_b32 s10, s6 71; GFX11-NEXT: s_mov_b32 s11, s7 72; GFX11-NEXT: s_waitcnt lgkmcnt(0) 73; GFX11-NEXT: s_mov_b32 s4, s2 74; GFX11-NEXT: s_mov_b32 s5, s3 75; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc 76; GFX11-NEXT: s_waitcnt vmcnt(0) 77; GFX11-NEXT: buffer_load_u16 v1, off, s[8:11], 0 glc dlc 78; GFX11-NEXT: s_waitcnt vmcnt(0) 79; GFX11-NEXT: s_mov_b32 s2, s6 80; GFX11-NEXT: s_mov_b32 s3, s7 81; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1 82; GFX11-NEXT: s_cbranch_vccnz .LBB0_2 83; GFX11-NEXT: ; %bb.1: ; %one 84; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 85; GFX11-NEXT: s_endpgm 86; GFX11-NEXT: .LBB0_2: ; %two 87; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0 88; GFX11-NEXT: s_endpgm 89 ptr addrspace(1) %r, 90 ptr addrspace(1) %a, 91 ptr addrspace(1) %b) { 92entry: 93 %a.val = load volatile half, ptr addrspace(1) %a 94 %b.val = load volatile half, ptr addrspace(1) %b 95 %fcmp = fcmp olt half %a.val, %b.val 96 br i1 %fcmp, label %one, label %two 97 98one: 99 store half %a.val, ptr addrspace(1) %r 100 ret void 101 102two: 103 store half %b.val, ptr addrspace(1) %r 104 ret void 105} 106 107define amdgpu_kernel void @br_cc_f16_imm_a( 108; SI-LABEL: br_cc_f16_imm_a: 109; SI: ; %bb.0: ; %entry 110; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 111; SI-NEXT: s_mov_b32 s7, 0xf000 112; SI-NEXT: s_mov_b32 s6, -1 113; SI-NEXT: s_waitcnt lgkmcnt(0) 114; SI-NEXT: s_mov_b32 s4, s2 115; SI-NEXT: s_mov_b32 s5, s3 116; SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 117; SI-NEXT: s_waitcnt vmcnt(0) 118; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 119; SI-NEXT: v_cmp_nlt_f32_e32 vcc, 0.5, v0 120; SI-NEXT: s_cbranch_vccnz .LBB1_2 121; SI-NEXT: ; %bb.1: ; %one 122; SI-NEXT: s_mov_b32 s2, s6 123; SI-NEXT: s_mov_b32 s3, s7 124; SI-NEXT: v_mov_b32_e32 v0, 0x3800 125; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 126; SI-NEXT: s_endpgm 127; SI-NEXT: .LBB1_2: ; %two 128; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 129; SI-NEXT: s_mov_b32 s2, s6 130; SI-NEXT: s_mov_b32 s3, s7 131; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 132; SI-NEXT: s_endpgm 133; 134; VI-LABEL: br_cc_f16_imm_a: 135; VI: ; %bb.0: ; %entry 136; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 137; VI-NEXT: s_mov_b32 s7, 0xf000 138; VI-NEXT: s_mov_b32 s6, -1 139; VI-NEXT: s_waitcnt lgkmcnt(0) 140; VI-NEXT: s_mov_b32 s4, s2 141; VI-NEXT: s_mov_b32 s5, s3 142; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 143; VI-NEXT: s_mov_b32 s2, s6 144; VI-NEXT: s_mov_b32 s3, s7 145; VI-NEXT: s_waitcnt vmcnt(0) 146; VI-NEXT: v_cmp_nlt_f16_e32 vcc, 0.5, v0 147; VI-NEXT: s_cbranch_vccnz .LBB1_2 148; VI-NEXT: ; %bb.1: ; %one 149; VI-NEXT: v_mov_b32_e32 v0, 0x3800 150; VI-NEXT: .LBB1_2: ; %two 151; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 152; VI-NEXT: s_endpgm 153; 154; GFX11-LABEL: br_cc_f16_imm_a: 155; GFX11: ; %bb.0: ; %entry 156; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 157; GFX11-NEXT: s_mov_b32 s7, 0x31016000 158; GFX11-NEXT: s_mov_b32 s6, -1 159; GFX11-NEXT: s_waitcnt lgkmcnt(0) 160; GFX11-NEXT: s_mov_b32 s4, s2 161; GFX11-NEXT: s_mov_b32 s5, s3 162; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0 163; GFX11-NEXT: s_waitcnt vmcnt(0) 164; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v0 165; GFX11-NEXT: s_cbranch_vccnz .LBB1_2 166; GFX11-NEXT: ; %bb.1: ; %one 167; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800 168; GFX11-NEXT: .LBB1_2: ; %two 169; GFX11-NEXT: s_mov_b32 s2, s6 170; GFX11-NEXT: s_mov_b32 s3, s7 171; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 172; GFX11-NEXT: s_endpgm 173 ptr addrspace(1) %r, 174 ptr addrspace(1) %b) { 175entry: 176 %b.val = load half, ptr addrspace(1) %b 177 %fcmp = fcmp olt half 0xH3800, %b.val 178 br i1 %fcmp, label %one, label %two 179 180one: 181 store half 0xH3800, ptr addrspace(1) %r 182 ret void 183 184two: 185 store half %b.val, ptr addrspace(1) %r 186 ret void 187} 188 189define amdgpu_kernel void @br_cc_f16_imm_b( 190; SI-LABEL: br_cc_f16_imm_b: 191; SI: ; %bb.0: ; %entry 192; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 193; SI-NEXT: s_mov_b32 s7, 0xf000 194; SI-NEXT: s_mov_b32 s6, -1 195; SI-NEXT: s_waitcnt lgkmcnt(0) 196; SI-NEXT: s_mov_b32 s4, s2 197; SI-NEXT: s_mov_b32 s5, s3 198; SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 199; SI-NEXT: s_waitcnt vmcnt(0) 200; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 201; SI-NEXT: v_cmp_ngt_f32_e32 vcc, 0.5, v0 202; SI-NEXT: s_cbranch_vccnz .LBB2_2 203; SI-NEXT: ; %bb.1: ; %one 204; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 205; SI-NEXT: s_mov_b32 s2, s6 206; SI-NEXT: s_mov_b32 s3, s7 207; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 208; SI-NEXT: s_endpgm 209; SI-NEXT: .LBB2_2: ; %two 210; SI-NEXT: s_mov_b32 s2, s6 211; SI-NEXT: s_mov_b32 s3, s7 212; SI-NEXT: v_mov_b32_e32 v0, 0x3800 213; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 214; SI-NEXT: s_endpgm 215; 216; VI-LABEL: br_cc_f16_imm_b: 217; VI: ; %bb.0: ; %entry 218; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 219; VI-NEXT: s_mov_b32 s7, 0xf000 220; VI-NEXT: s_mov_b32 s6, -1 221; VI-NEXT: s_waitcnt lgkmcnt(0) 222; VI-NEXT: s_mov_b32 s4, s2 223; VI-NEXT: s_mov_b32 s5, s3 224; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 225; VI-NEXT: s_mov_b32 s2, s6 226; VI-NEXT: s_mov_b32 s3, s7 227; VI-NEXT: s_waitcnt vmcnt(0) 228; VI-NEXT: v_cmp_ngt_f16_e32 vcc, 0.5, v0 229; VI-NEXT: s_cbranch_vccnz .LBB2_2 230; VI-NEXT: ; %bb.1: ; %one 231; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 232; VI-NEXT: s_endpgm 233; VI-NEXT: .LBB2_2: ; %two 234; VI-NEXT: v_mov_b32_e32 v0, 0x3800 235; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 236; VI-NEXT: s_endpgm 237; 238; GFX11-LABEL: br_cc_f16_imm_b: 239; GFX11: ; %bb.0: ; %entry 240; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 241; GFX11-NEXT: s_mov_b32 s7, 0x31016000 242; GFX11-NEXT: s_mov_b32 s6, -1 243; GFX11-NEXT: s_waitcnt lgkmcnt(0) 244; GFX11-NEXT: s_mov_b32 s4, s2 245; GFX11-NEXT: s_mov_b32 s5, s3 246; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0 247; GFX11-NEXT: s_waitcnt vmcnt(0) 248; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v0 249; GFX11-NEXT: s_cbranch_vccz .LBB2_2 250; GFX11-NEXT: ; %bb.1: ; %two 251; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800 252; GFX11-NEXT: .LBB2_2: ; %one 253; GFX11-NEXT: s_mov_b32 s2, s6 254; GFX11-NEXT: s_mov_b32 s3, s7 255; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 256; GFX11-NEXT: s_endpgm 257 ptr addrspace(1) %r, 258 ptr addrspace(1) %a) { 259entry: 260 %a.val = load half, ptr addrspace(1) %a 261 %fcmp = fcmp olt half %a.val, 0xH3800 262 br i1 %fcmp, label %one, label %two 263 264one: 265 store half %a.val, ptr addrspace(1) %r 266 ret void 267 268two: 269 store half 0xH3800, ptr addrspace(1) %r 270 ret void 271} 272