1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck --check-prefix=GFX7 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10_W32 %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX10_W64 %s 6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefix=GFX11_W32 %s 7; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX11_W64 %s 8; REQUIRES: do-not-run-me 9 10define float @v_div_fmas_f32(float %a, float %b, float %c, i1 %d) { 11; GFX7-LABEL: v_div_fmas_f32: 12; GFX7: ; %bb.0: 13; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14; GFX7-NEXT: v_and_b32_e32 v3, 1, v3 15; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 16; GFX7-NEXT: s_nop 3 17; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2 18; GFX7-NEXT: s_setpc_b64 s[30:31] 19; 20; GFX8-LABEL: v_div_fmas_f32: 21; GFX8: ; %bb.0: 22; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23; GFX8-NEXT: v_and_b32_e32 v3, 1, v3 24; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 25; GFX8-NEXT: s_nop 3 26; GFX8-NEXT: v_div_fmas_f32 v0, v0, v1, v2 27; GFX8-NEXT: s_setpc_b64 s[30:31] 28; 29; GFX10_W32-LABEL: v_div_fmas_f32: 30; GFX10_W32: ; %bb.0: 31; GFX10_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 32; GFX10_W32-NEXT: v_and_b32_e32 v3, 1, v3 33; GFX10_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3 34; GFX10_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2 35; GFX10_W32-NEXT: s_setpc_b64 s[30:31] 36; 37; GFX10_W64-LABEL: v_div_fmas_f32: 38; GFX10_W64: ; %bb.0: 39; GFX10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 40; GFX10_W64-NEXT: v_and_b32_e32 v3, 1, v3 41; GFX10_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 42; GFX10_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2 43; GFX10_W64-NEXT: s_setpc_b64 s[30:31] 44; 45; GFX11_W32-LABEL: v_div_fmas_f32: 46; GFX11_W32: ; %bb.0: 47; GFX11_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GFX11_W32-NEXT: v_and_b32_e32 v3, 1, v3 49; GFX11_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3 50; GFX11_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2 51; GFX11_W32-NEXT: s_setpc_b64 s[30:31] 52; 53; GFX11_W64-LABEL: v_div_fmas_f32: 54; GFX11_W64: ; %bb.0: 55; GFX11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56; GFX11_W64-NEXT: v_and_b32_e32 v3, 1, v3 57; GFX11_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3 58; GFX11_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2 59; GFX11_W64-NEXT: s_setpc_b64 s[30:31] 60 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d) 61 ret float %result 62} 63 64define double @v_div_fmas_f64(double %a, double %b, double %c, i1 %d) { 65; GFX7-LABEL: v_div_fmas_f64: 66; GFX7: ; %bb.0: 67; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; GFX7-NEXT: v_and_b32_e32 v6, 1, v6 69; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 70; GFX7-NEXT: s_nop 3 71; GFX7-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] 72; GFX7-NEXT: s_setpc_b64 s[30:31] 73; 74; GFX8-LABEL: v_div_fmas_f64: 75; GFX8: ; %bb.0: 76; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 77; GFX8-NEXT: v_and_b32_e32 v6, 1, v6 78; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 79; GFX8-NEXT: s_nop 3 80; GFX8-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] 81; GFX8-NEXT: s_setpc_b64 s[30:31] 82; 83; GFX10_W32-LABEL: v_div_fmas_f64: 84; GFX10_W32: ; %bb.0: 85; GFX10_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 86; GFX10_W32-NEXT: v_and_b32_e32 v6, 1, v6 87; GFX10_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 88; GFX10_W32-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] 89; GFX10_W32-NEXT: s_setpc_b64 s[30:31] 90; 91; GFX10_W64-LABEL: v_div_fmas_f64: 92; GFX10_W64: ; %bb.0: 93; GFX10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 94; GFX10_W64-NEXT: v_and_b32_e32 v6, 1, v6 95; GFX10_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 96; GFX10_W64-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] 97; GFX10_W64-NEXT: s_setpc_b64 s[30:31] 98; 99; GFX11_W32-LABEL: v_div_fmas_f64: 100; GFX11_W32: ; %bb.0: 101; GFX11_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; GFX11_W32-NEXT: v_and_b32_e32 v6, 1, v6 103; GFX11_W32-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 104; GFX11_W32-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] 105; GFX11_W32-NEXT: s_setpc_b64 s[30:31] 106; 107; GFX11_W64-LABEL: v_div_fmas_f64: 108; GFX11_W64: ; %bb.0: 109; GFX11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 110; GFX11_W64-NEXT: v_and_b32_e32 v6, 1, v6 111; GFX11_W64-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6 112; GFX11_W64-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] 113; GFX11_W64-NEXT: s_setpc_b64 s[30:31] 114 %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d) 115 ret double %result 116} 117 118define amdgpu_ps float @s_div_fmas_f32(float inreg %a, float inreg %b, float inreg %c, i32 inreg %d) { 119; GFX7-LABEL: s_div_fmas_f32: 120; GFX7: ; %bb.0: 121; GFX7-NEXT: s_cmp_eq_u32 s3, 0 122; GFX7-NEXT: s_cselect_b32 s3, 1, 0 123; GFX7-NEXT: v_mov_b32_e32 v0, s0 124; GFX7-NEXT: s_and_b32 s0, 1, s3 125; GFX7-NEXT: v_mov_b32_e32 v1, s1 126; GFX7-NEXT: v_mov_b32_e32 v2, s2 127; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 128; GFX7-NEXT: s_nop 3 129; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2 130; GFX7-NEXT: ; return to shader part epilog 131; 132; GFX8-LABEL: s_div_fmas_f32: 133; GFX8: ; %bb.0: 134; GFX8-NEXT: s_cmp_eq_u32 s3, 0 135; GFX8-NEXT: s_cselect_b32 s3, 1, 0 136; GFX8-NEXT: v_mov_b32_e32 v0, s0 137; GFX8-NEXT: s_and_b32 s0, 1, s3 138; GFX8-NEXT: v_mov_b32_e32 v1, s1 139; GFX8-NEXT: v_mov_b32_e32 v2, s2 140; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 141; GFX8-NEXT: s_nop 3 142; GFX8-NEXT: v_div_fmas_f32 v0, v0, v1, v2 143; GFX8-NEXT: ; return to shader part epilog 144; 145; GFX10_W32-LABEL: s_div_fmas_f32: 146; GFX10_W32: ; %bb.0: 147; GFX10_W32-NEXT: s_cmp_eq_u32 s3, 0 148; GFX10_W32-NEXT: v_mov_b32_e32 v0, s1 149; GFX10_W32-NEXT: s_cselect_b32 s3, 1, 0 150; GFX10_W32-NEXT: v_mov_b32_e32 v1, s2 151; GFX10_W32-NEXT: s_and_b32 s3, 1, s3 152; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s3 153; GFX10_W32-NEXT: v_div_fmas_f32 v0, s0, v0, v1 154; GFX10_W32-NEXT: ; return to shader part epilog 155; 156; GFX10_W64-LABEL: s_div_fmas_f32: 157; GFX10_W64: ; %bb.0: 158; GFX10_W64-NEXT: s_cmp_eq_u32 s3, 0 159; GFX10_W64-NEXT: v_mov_b32_e32 v0, s1 160; GFX10_W64-NEXT: s_cselect_b32 s3, 1, 0 161; GFX10_W64-NEXT: v_mov_b32_e32 v1, s2 162; GFX10_W64-NEXT: s_and_b32 s3, 1, s3 163; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s3 164; GFX10_W64-NEXT: v_div_fmas_f32 v0, s0, v0, v1 165; GFX10_W64-NEXT: ; return to shader part epilog 166; 167; GFX11_W32-LABEL: s_div_fmas_f32: 168; GFX11_W32: ; %bb.0: 169; GFX11_W32-NEXT: s_cmp_eq_u32 s3, 0 170; GFX11_W32-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s2 171; GFX11_W32-NEXT: s_cselect_b32 s3, 1, 0 172; GFX11_W32-NEXT: s_and_b32 s3, 1, s3 173; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s3 174; GFX11_W32-NEXT: v_div_fmas_f32 v0, s0, v0, v1 175; GFX11_W32-NEXT: ; return to shader part epilog 176; 177; GFX11_W64-LABEL: s_div_fmas_f32: 178; GFX11_W64: ; %bb.0: 179; GFX11_W64-NEXT: s_cmp_eq_u32 s3, 0 180; GFX11_W64-NEXT: v_mov_b32_e32 v0, s1 181; GFX11_W64-NEXT: s_cselect_b32 s3, 1, 0 182; GFX11_W64-NEXT: v_mov_b32_e32 v1, s2 183; GFX11_W64-NEXT: s_and_b32 s3, 1, s3 184; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s3 185; GFX11_W64-NEXT: v_div_fmas_f32 v0, s0, v0, v1 186; GFX11_W64-NEXT: ; return to shader part epilog 187 %vcc = icmp eq i32 %d, 0 188 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %vcc) 189 ret float %result 190} 191 192define amdgpu_ps double @s_div_fmas_f64(double inreg %a, double inreg %b, double inreg %c, i32 inreg %d) { 193; GFX7-LABEL: s_div_fmas_f64: 194; GFX7: ; %bb.0: 195; GFX7-NEXT: s_cmp_eq_u32 s6, 0 196; GFX7-NEXT: s_cselect_b32 s6, 1, 0 197; GFX7-NEXT: v_mov_b32_e32 v0, s0 198; GFX7-NEXT: v_mov_b32_e32 v1, s1 199; GFX7-NEXT: v_mov_b32_e32 v2, s2 200; GFX7-NEXT: v_mov_b32_e32 v4, s4 201; GFX7-NEXT: s_and_b32 s0, 1, s6 202; GFX7-NEXT: v_mov_b32_e32 v3, s3 203; GFX7-NEXT: v_mov_b32_e32 v5, s5 204; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 205; GFX7-NEXT: s_nop 3 206; GFX7-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] 207; GFX7-NEXT: v_readfirstlane_b32 s0, v0 208; GFX7-NEXT: v_readfirstlane_b32 s1, v1 209; GFX7-NEXT: ; return to shader part epilog 210; 211; GFX8-LABEL: s_div_fmas_f64: 212; GFX8: ; %bb.0: 213; GFX8-NEXT: s_cmp_eq_u32 s6, 0 214; GFX8-NEXT: s_cselect_b32 s6, 1, 0 215; GFX8-NEXT: v_mov_b32_e32 v0, s0 216; GFX8-NEXT: v_mov_b32_e32 v1, s1 217; GFX8-NEXT: v_mov_b32_e32 v2, s2 218; GFX8-NEXT: v_mov_b32_e32 v4, s4 219; GFX8-NEXT: s_and_b32 s0, 1, s6 220; GFX8-NEXT: v_mov_b32_e32 v3, s3 221; GFX8-NEXT: v_mov_b32_e32 v5, s5 222; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 223; GFX8-NEXT: s_nop 3 224; GFX8-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] 225; GFX8-NEXT: v_readfirstlane_b32 s0, v0 226; GFX8-NEXT: v_readfirstlane_b32 s1, v1 227; GFX8-NEXT: ; return to shader part epilog 228; 229; GFX10_W32-LABEL: s_div_fmas_f64: 230; GFX10_W32: ; %bb.0: 231; GFX10_W32-NEXT: s_cmp_eq_u32 s6, 0 232; GFX10_W32-NEXT: v_mov_b32_e32 v0, s2 233; GFX10_W32-NEXT: s_cselect_b32 s6, 1, 0 234; GFX10_W32-NEXT: v_mov_b32_e32 v2, s4 235; GFX10_W32-NEXT: s_and_b32 s6, 1, s6 236; GFX10_W32-NEXT: v_mov_b32_e32 v1, s3 237; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6 238; GFX10_W32-NEXT: v_mov_b32_e32 v3, s5 239; GFX10_W32-NEXT: v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3] 240; GFX10_W32-NEXT: v_readfirstlane_b32 s0, v0 241; GFX10_W32-NEXT: v_readfirstlane_b32 s1, v1 242; GFX10_W32-NEXT: ; return to shader part epilog 243; 244; GFX10_W64-LABEL: s_div_fmas_f64: 245; GFX10_W64: ; %bb.0: 246; GFX10_W64-NEXT: s_cmp_eq_u32 s6, 0 247; GFX10_W64-NEXT: v_mov_b32_e32 v0, s2 248; GFX10_W64-NEXT: s_cselect_b32 s6, 1, 0 249; GFX10_W64-NEXT: v_mov_b32_e32 v2, s4 250; GFX10_W64-NEXT: s_and_b32 s6, 1, s6 251; GFX10_W64-NEXT: v_mov_b32_e32 v1, s3 252; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 253; GFX10_W64-NEXT: v_mov_b32_e32 v3, s5 254; GFX10_W64-NEXT: v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3] 255; GFX10_W64-NEXT: v_readfirstlane_b32 s0, v0 256; GFX10_W64-NEXT: v_readfirstlane_b32 s1, v1 257; GFX10_W64-NEXT: ; return to shader part epilog 258; 259; GFX11_W32-LABEL: s_div_fmas_f64: 260; GFX11_W32: ; %bb.0: 261; GFX11_W32-NEXT: s_cmp_eq_u32 s6, 0 262; GFX11_W32-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 263; GFX11_W32-NEXT: s_cselect_b32 s6, 1, 0 264; GFX11_W32-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5 265; GFX11_W32-NEXT: s_and_b32 s6, 1, s6 266; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6 267; GFX11_W32-NEXT: v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3] 268; GFX11_W32-NEXT: v_readfirstlane_b32 s0, v0 269; GFX11_W32-NEXT: v_readfirstlane_b32 s1, v1 270; GFX11_W32-NEXT: ; return to shader part epilog 271; 272; GFX11_W64-LABEL: s_div_fmas_f64: 273; GFX11_W64: ; %bb.0: 274; GFX11_W64-NEXT: s_cmp_eq_u32 s6, 0 275; GFX11_W64-NEXT: v_mov_b32_e32 v0, s2 276; GFX11_W64-NEXT: s_cselect_b32 s6, 1, 0 277; GFX11_W64-NEXT: v_mov_b32_e32 v2, s4 278; GFX11_W64-NEXT: s_and_b32 s6, 1, s6 279; GFX11_W64-NEXT: v_mov_b32_e32 v1, s3 280; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s6 281; GFX11_W64-NEXT: v_mov_b32_e32 v3, s5 282; GFX11_W64-NEXT: v_div_fmas_f64 v[0:1], s[0:1], v[0:1], v[2:3] 283; GFX11_W64-NEXT: v_readfirstlane_b32 s0, v0 284; GFX11_W64-NEXT: v_readfirstlane_b32 s1, v1 285; GFX11_W64-NEXT: ; return to shader part epilog 286 %vcc = icmp eq i32 %d, 0 287 %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %vcc) 288 ret double %result 289} 290 291define amdgpu_kernel void @test_div_fmas_f32(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { 292; GFX7-LABEL: test_div_fmas_f32: 293; GFX7: ; %bb.0: 294; GFX7-NEXT: s_load_dword s4, s[2:3], 0xa 295; GFX7-NEXT: s_load_dword s5, s[2:3], 0x13 296; GFX7-NEXT: s_load_dword s6, s[2:3], 0x1c 297; GFX7-NEXT: s_load_dword s7, s[2:3], 0x25 298; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 299; GFX7-NEXT: s_waitcnt lgkmcnt(0) 300; GFX7-NEXT: v_mov_b32_e32 v0, s4 301; GFX7-NEXT: v_mov_b32_e32 v1, s5 302; GFX7-NEXT: v_mov_b32_e32 v2, s6 303; GFX7-NEXT: s_and_b32 s2, 1, s7 304; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 305; GFX7-NEXT: s_mov_b32 s2, -1 306; GFX7-NEXT: s_mov_b32 s3, 0xf000 307; GFX7-NEXT: s_nop 1 308; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2 309; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 310; GFX7-NEXT: s_endpgm 311; 312; GFX8-LABEL: test_div_fmas_f32: 313; GFX8: ; %bb.0: 314; GFX8-NEXT: s_load_dword s0, s[2:3], 0x28 315; GFX8-NEXT: s_load_dword s1, s[2:3], 0x4c 316; GFX8-NEXT: s_load_dword s4, s[2:3], 0x70 317; GFX8-NEXT: s_load_dword s5, s[2:3], 0x94 318; GFX8-NEXT: s_waitcnt lgkmcnt(0) 319; GFX8-NEXT: v_mov_b32_e32 v0, s0 320; GFX8-NEXT: v_mov_b32_e32 v1, s1 321; GFX8-NEXT: v_mov_b32_e32 v2, s4 322; GFX8-NEXT: s_and_b32 s0, 1, s5 323; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 324; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 325; GFX8-NEXT: s_nop 2 326; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2 327; GFX8-NEXT: s_waitcnt lgkmcnt(0) 328; GFX8-NEXT: v_mov_b32_e32 v0, s0 329; GFX8-NEXT: v_mov_b32_e32 v1, s1 330; GFX8-NEXT: flat_store_dword v[0:1], v2 331; GFX8-NEXT: s_endpgm 332; 333; GFX10_W32-LABEL: test_div_fmas_f32: 334; GFX10_W32: ; %bb.0: 335; GFX10_W32-NEXT: s_clause 0x4 336; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x94 337; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x4c 338; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x70 339; GFX10_W32-NEXT: s_load_dword s7, s[2:3], 0x28 340; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 341; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) 342; GFX10_W32-NEXT: s_and_b32 s2, 1, s4 343; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 344; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 345; GFX10_W32-NEXT: v_mov_b32_e32 v1, s6 346; GFX10_W32-NEXT: v_div_fmas_f32 v0, s7, v0, v1 347; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 348; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] 349; GFX10_W32-NEXT: s_endpgm 350; 351; GFX10_W64-LABEL: test_div_fmas_f32: 352; GFX10_W64: ; %bb.0: 353; GFX10_W64-NEXT: s_clause 0x4 354; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x94 355; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x4c 356; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x70 357; GFX10_W64-NEXT: s_load_dword s7, s[2:3], 0x28 358; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 359; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) 360; GFX10_W64-NEXT: s_and_b32 s2, 1, s4 361; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 362; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 363; GFX10_W64-NEXT: v_mov_b32_e32 v1, s6 364; GFX10_W64-NEXT: v_div_fmas_f32 v0, s7, v0, v1 365; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 366; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] 367; GFX10_W64-NEXT: s_endpgm 368; 369; GFX11_W32-LABEL: test_div_fmas_f32: 370; GFX11_W32: ; %bb.0: 371; GFX11_W32-NEXT: s_clause 0x4 372; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x94 373; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x4c 374; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x70 375; GFX11_W32-NEXT: s_load_b32 s7, s[2:3], 0x28 376; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 377; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) 378; GFX11_W32-NEXT: s_and_b32 s2, 1, s4 379; GFX11_W32-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 380; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 381; GFX11_W32-NEXT: v_div_fmas_f32 v0, s7, v0, v1 382; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 383; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] 384; GFX11_W32-NEXT: s_nop 0 385; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 386; GFX11_W32-NEXT: s_endpgm 387; 388; GFX11_W64-LABEL: test_div_fmas_f32: 389; GFX11_W64: ; %bb.0: 390; GFX11_W64-NEXT: s_clause 0x4 391; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x94 392; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x4c 393; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x70 394; GFX11_W64-NEXT: s_load_b32 s7, s[2:3], 0x28 395; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 396; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) 397; GFX11_W64-NEXT: s_and_b32 s2, 1, s4 398; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5 399; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 400; GFX11_W64-NEXT: v_mov_b32_e32 v1, s6 401; GFX11_W64-NEXT: v_div_fmas_f32 v0, s7, v0, v1 402; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 403; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] 404; GFX11_W64-NEXT: s_nop 0 405; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 406; GFX11_W64-NEXT: s_endpgm 407 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %d) 408 store float %result, ptr addrspace(1) %out, align 4 409 ret void 410} 411 412define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { 413; GFX7-LABEL: test_div_fmas_f32_inline_imm_0: 414; GFX7: ; %bb.0: 415; GFX7-NEXT: s_load_dword s4, s[2:3], 0x13 416; GFX7-NEXT: s_load_dword s5, s[2:3], 0x1c 417; GFX7-NEXT: s_load_dword s6, s[2:3], 0x25 418; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 419; GFX7-NEXT: s_mov_b32 s3, 0xf000 420; GFX7-NEXT: s_waitcnt lgkmcnt(0) 421; GFX7-NEXT: v_mov_b32_e32 v0, s4 422; GFX7-NEXT: v_mov_b32_e32 v1, s5 423; GFX7-NEXT: s_and_b32 s2, 1, s6 424; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 425; GFX7-NEXT: s_mov_b32 s2, -1 426; GFX7-NEXT: s_nop 2 427; GFX7-NEXT: v_div_fmas_f32 v0, 1.0, v0, v1 428; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 429; GFX7-NEXT: s_endpgm 430; 431; GFX8-LABEL: test_div_fmas_f32_inline_imm_0: 432; GFX8: ; %bb.0: 433; GFX8-NEXT: s_load_dword s0, s[2:3], 0x4c 434; GFX8-NEXT: s_load_dword s1, s[2:3], 0x70 435; GFX8-NEXT: s_load_dword s4, s[2:3], 0x94 436; GFX8-NEXT: s_waitcnt lgkmcnt(0) 437; GFX8-NEXT: v_mov_b32_e32 v0, s0 438; GFX8-NEXT: v_mov_b32_e32 v1, s1 439; GFX8-NEXT: s_and_b32 s0, 1, s4 440; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 441; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 442; GFX8-NEXT: s_nop 2 443; GFX8-NEXT: v_div_fmas_f32 v2, 1.0, v0, v1 444; GFX8-NEXT: s_waitcnt lgkmcnt(0) 445; GFX8-NEXT: v_mov_b32_e32 v0, s0 446; GFX8-NEXT: v_mov_b32_e32 v1, s1 447; GFX8-NEXT: flat_store_dword v[0:1], v2 448; GFX8-NEXT: s_endpgm 449; 450; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_0: 451; GFX10_W32: ; %bb.0: 452; GFX10_W32-NEXT: s_clause 0x3 453; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x94 454; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x70 455; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x4c 456; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 457; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 458; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) 459; GFX10_W32-NEXT: s_and_b32 s2, 1, s4 460; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 461; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 462; GFX10_W32-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0 463; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] 464; GFX10_W32-NEXT: s_endpgm 465; 466; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_0: 467; GFX10_W64: ; %bb.0: 468; GFX10_W64-NEXT: s_clause 0x3 469; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x94 470; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x70 471; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x4c 472; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 473; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 474; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) 475; GFX10_W64-NEXT: s_and_b32 s2, 1, s4 476; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 477; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 478; GFX10_W64-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0 479; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] 480; GFX10_W64-NEXT: s_endpgm 481; 482; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_0: 483; GFX11_W32: ; %bb.0: 484; GFX11_W32-NEXT: s_clause 0x3 485; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x94 486; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x70 487; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x4c 488; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 489; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 490; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) 491; GFX11_W32-NEXT: s_and_b32 s2, 1, s4 492; GFX11_W32-NEXT: v_mov_b32_e32 v0, s5 493; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 494; GFX11_W32-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0 495; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] 496; GFX11_W32-NEXT: s_nop 0 497; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 498; GFX11_W32-NEXT: s_endpgm 499; 500; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_0: 501; GFX11_W64: ; %bb.0: 502; GFX11_W64-NEXT: s_clause 0x3 503; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x94 504; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x70 505; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x4c 506; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 507; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 508; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) 509; GFX11_W64-NEXT: s_and_b32 s2, 1, s4 510; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5 511; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 512; GFX11_W64-NEXT: v_div_fmas_f32 v0, 1.0, s6, v0 513; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] 514; GFX11_W64-NEXT: s_nop 0 515; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 516; GFX11_W64-NEXT: s_endpgm 517 %result = call float @llvm.amdgcn.div.fmas.f32(float 1.0, float %b, float %c, i1 %d) 518 store float %result, ptr addrspace(1) %out, align 4 519 ret void 520} 521 522define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(ptr addrspace(1) %out, float %a, float %b, float %c, [8 x i32], i1 %d) { 523; GFX7-LABEL: test_div_fmas_f32_inline_imm_1: 524; GFX7: ; %bb.0: 525; GFX7-NEXT: s_load_dword s4, s[2:3], 0x2 526; GFX7-NEXT: s_load_dword s5, s[2:3], 0x4 527; GFX7-NEXT: s_load_dword s6, s[2:3], 0xd 528; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 529; GFX7-NEXT: s_mov_b32 s3, 0xf000 530; GFX7-NEXT: s_waitcnt lgkmcnt(0) 531; GFX7-NEXT: v_mov_b32_e32 v0, s4 532; GFX7-NEXT: v_mov_b32_e32 v1, s5 533; GFX7-NEXT: s_and_b32 s2, 1, s6 534; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 535; GFX7-NEXT: s_mov_b32 s2, -1 536; GFX7-NEXT: s_nop 2 537; GFX7-NEXT: v_div_fmas_f32 v0, v0, 1.0, v1 538; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 539; GFX7-NEXT: s_endpgm 540; 541; GFX8-LABEL: test_div_fmas_f32_inline_imm_1: 542; GFX8: ; %bb.0: 543; GFX8-NEXT: s_load_dword s0, s[2:3], 0x8 544; GFX8-NEXT: s_load_dword s1, s[2:3], 0x10 545; GFX8-NEXT: s_load_dword s4, s[2:3], 0x34 546; GFX8-NEXT: s_waitcnt lgkmcnt(0) 547; GFX8-NEXT: v_mov_b32_e32 v0, s0 548; GFX8-NEXT: v_mov_b32_e32 v1, s1 549; GFX8-NEXT: s_and_b32 s0, 1, s4 550; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 551; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 552; GFX8-NEXT: s_nop 2 553; GFX8-NEXT: v_div_fmas_f32 v2, v0, 1.0, v1 554; GFX8-NEXT: s_waitcnt lgkmcnt(0) 555; GFX8-NEXT: v_mov_b32_e32 v0, s0 556; GFX8-NEXT: v_mov_b32_e32 v1, s1 557; GFX8-NEXT: flat_store_dword v[0:1], v2 558; GFX8-NEXT: s_endpgm 559; 560; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_1: 561; GFX10_W32: ; %bb.0: 562; GFX10_W32-NEXT: s_clause 0x3 563; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x34 564; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x10 565; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x8 566; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 567; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 568; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) 569; GFX10_W32-NEXT: s_and_b32 s2, 1, s4 570; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 571; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 572; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0 573; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] 574; GFX10_W32-NEXT: s_endpgm 575; 576; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_1: 577; GFX10_W64: ; %bb.0: 578; GFX10_W64-NEXT: s_clause 0x3 579; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x34 580; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x10 581; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x8 582; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 583; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 584; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) 585; GFX10_W64-NEXT: s_and_b32 s2, 1, s4 586; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 587; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 588; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0 589; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] 590; GFX10_W64-NEXT: s_endpgm 591; 592; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_1: 593; GFX11_W32: ; %bb.0: 594; GFX11_W32-NEXT: s_clause 0x3 595; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x34 596; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x10 597; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x8 598; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 599; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 600; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) 601; GFX11_W32-NEXT: s_and_b32 s2, 1, s4 602; GFX11_W32-NEXT: v_mov_b32_e32 v0, s5 603; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 604; GFX11_W32-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0 605; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] 606; GFX11_W32-NEXT: s_nop 0 607; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 608; GFX11_W32-NEXT: s_endpgm 609; 610; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_1: 611; GFX11_W64: ; %bb.0: 612; GFX11_W64-NEXT: s_clause 0x3 613; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x34 614; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x10 615; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x8 616; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 617; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 618; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) 619; GFX11_W64-NEXT: s_and_b32 s2, 1, s4 620; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5 621; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 622; GFX11_W64-NEXT: v_div_fmas_f32 v0, s6, 1.0, v0 623; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] 624; GFX11_W64-NEXT: s_nop 0 625; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 626; GFX11_W64-NEXT: s_endpgm 627 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float 1.0, float %c, i1 %d) 628 store float %result, ptr addrspace(1) %out, align 4 629 ret void 630} 631 632define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) { 633; GFX7-LABEL: test_div_fmas_f32_inline_imm_2: 634; GFX7: ; %bb.0: 635; GFX7-NEXT: s_load_dword s4, s[2:3], 0xa 636; GFX7-NEXT: s_load_dword s5, s[2:3], 0x13 637; GFX7-NEXT: s_load_dword s6, s[2:3], 0x25 638; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 639; GFX7-NEXT: s_mov_b32 s3, 0xf000 640; GFX7-NEXT: s_waitcnt lgkmcnt(0) 641; GFX7-NEXT: v_mov_b32_e32 v0, s4 642; GFX7-NEXT: v_mov_b32_e32 v1, s5 643; GFX7-NEXT: s_and_b32 s2, 1, s6 644; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 645; GFX7-NEXT: s_mov_b32 s2, -1 646; GFX7-NEXT: s_nop 2 647; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, 1.0 648; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 649; GFX7-NEXT: s_endpgm 650; 651; GFX8-LABEL: test_div_fmas_f32_inline_imm_2: 652; GFX8: ; %bb.0: 653; GFX8-NEXT: s_load_dword s0, s[2:3], 0x28 654; GFX8-NEXT: s_load_dword s1, s[2:3], 0x4c 655; GFX8-NEXT: s_load_dword s4, s[2:3], 0x94 656; GFX8-NEXT: s_waitcnt lgkmcnt(0) 657; GFX8-NEXT: v_mov_b32_e32 v0, s0 658; GFX8-NEXT: v_mov_b32_e32 v1, s1 659; GFX8-NEXT: s_and_b32 s0, 1, s4 660; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 661; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 662; GFX8-NEXT: s_nop 2 663; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, 1.0 664; GFX8-NEXT: s_waitcnt lgkmcnt(0) 665; GFX8-NEXT: v_mov_b32_e32 v0, s0 666; GFX8-NEXT: v_mov_b32_e32 v1, s1 667; GFX8-NEXT: flat_store_dword v[0:1], v2 668; GFX8-NEXT: s_endpgm 669; 670; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_2: 671; GFX10_W32: ; %bb.0: 672; GFX10_W32-NEXT: s_clause 0x3 673; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x94 674; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x4c 675; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x28 676; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 677; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 678; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) 679; GFX10_W32-NEXT: s_and_b32 s2, 1, s4 680; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 681; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 682; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0 683; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] 684; GFX10_W32-NEXT: s_endpgm 685; 686; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_2: 687; GFX10_W64: ; %bb.0: 688; GFX10_W64-NEXT: s_clause 0x3 689; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x94 690; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x4c 691; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x28 692; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 693; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 694; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) 695; GFX10_W64-NEXT: s_and_b32 s2, 1, s4 696; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 697; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 698; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0 699; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] 700; GFX10_W64-NEXT: s_endpgm 701; 702; GFX11_W32-LABEL: test_div_fmas_f32_inline_imm_2: 703; GFX11_W32: ; %bb.0: 704; GFX11_W32-NEXT: s_clause 0x3 705; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x94 706; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x4c 707; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x28 708; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 709; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 710; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) 711; GFX11_W32-NEXT: s_and_b32 s2, 1, s4 712; GFX11_W32-NEXT: v_mov_b32_e32 v0, s5 713; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 714; GFX11_W32-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0 715; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] 716; GFX11_W32-NEXT: s_nop 0 717; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 718; GFX11_W32-NEXT: s_endpgm 719; 720; GFX11_W64-LABEL: test_div_fmas_f32_inline_imm_2: 721; GFX11_W64: ; %bb.0: 722; GFX11_W64-NEXT: s_clause 0x3 723; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x94 724; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x4c 725; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x28 726; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 727; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 728; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) 729; GFX11_W64-NEXT: s_and_b32 s2, 1, s4 730; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5 731; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 732; GFX11_W64-NEXT: v_div_fmas_f32 v0, s6, v0, 1.0 733; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] 734; GFX11_W64-NEXT: s_nop 0 735; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 736; GFX11_W64-NEXT: s_endpgm 737 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float 1.0, i1 %d) 738 store float %result, ptr addrspace(1) %out, align 4 739 ret void 740} 741 742define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, double %b, double %c, i1 %d) { 743; GFX7-LABEL: test_div_fmas_f64: 744; GFX7: ; %bb.0: 745; GFX7-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 746; GFX7-NEXT: s_load_dword s0, s[2:3], 0x8 747; GFX7-NEXT: s_waitcnt lgkmcnt(0) 748; GFX7-NEXT: v_mov_b32_e32 v0, s6 749; GFX7-NEXT: v_mov_b32_e32 v2, s8 750; GFX7-NEXT: v_mov_b32_e32 v4, s10 751; GFX7-NEXT: s_and_b32 s0, 1, s0 752; GFX7-NEXT: v_mov_b32_e32 v1, s7 753; GFX7-NEXT: v_mov_b32_e32 v3, s9 754; GFX7-NEXT: v_mov_b32_e32 v5, s11 755; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 756; GFX7-NEXT: s_mov_b32 s6, -1 757; GFX7-NEXT: s_mov_b32 s7, 0xf000 758; GFX7-NEXT: s_nop 1 759; GFX7-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] 760; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 761; GFX7-NEXT: s_endpgm 762; 763; GFX8-LABEL: test_div_fmas_f64: 764; GFX8: ; %bb.0: 765; GFX8-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 766; GFX8-NEXT: s_load_dword s0, s[2:3], 0x20 767; GFX8-NEXT: s_waitcnt lgkmcnt(0) 768; GFX8-NEXT: v_mov_b32_e32 v0, s6 769; GFX8-NEXT: v_mov_b32_e32 v2, s8 770; GFX8-NEXT: v_mov_b32_e32 v4, s10 771; GFX8-NEXT: s_and_b32 s0, 1, s0 772; GFX8-NEXT: v_mov_b32_e32 v1, s7 773; GFX8-NEXT: v_mov_b32_e32 v3, s9 774; GFX8-NEXT: v_mov_b32_e32 v5, s11 775; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 776; GFX8-NEXT: s_nop 3 777; GFX8-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5] 778; GFX8-NEXT: v_mov_b32_e32 v2, s4 779; GFX8-NEXT: v_mov_b32_e32 v3, s5 780; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 781; GFX8-NEXT: s_endpgm 782; 783; GFX10_W32-LABEL: test_div_fmas_f64: 784; GFX10_W32: ; %bb.0: 785; GFX10_W32-NEXT: s_clause 0x1 786; GFX10_W32-NEXT: s_load_dword s0, s[2:3], 0x20 787; GFX10_W32-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 788; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) 789; GFX10_W32-NEXT: s_and_b32 s0, 1, s0 790; GFX10_W32-NEXT: v_mov_b32_e32 v0, s8 791; GFX10_W32-NEXT: v_mov_b32_e32 v2, s10 792; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 793; GFX10_W32-NEXT: v_mov_b32_e32 v1, s9 794; GFX10_W32-NEXT: v_mov_b32_e32 v3, s11 795; GFX10_W32-NEXT: v_div_fmas_f64 v[0:1], s[6:7], v[0:1], v[2:3] 796; GFX10_W32-NEXT: v_mov_b32_e32 v2, 0 797; GFX10_W32-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] 798; GFX10_W32-NEXT: s_endpgm 799; 800; GFX10_W64-LABEL: test_div_fmas_f64: 801; GFX10_W64: ; %bb.0: 802; GFX10_W64-NEXT: s_clause 0x1 803; GFX10_W64-NEXT: s_load_dword s0, s[2:3], 0x20 804; GFX10_W64-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 805; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) 806; GFX10_W64-NEXT: s_and_b32 s0, 1, s0 807; GFX10_W64-NEXT: v_mov_b32_e32 v0, s8 808; GFX10_W64-NEXT: v_mov_b32_e32 v2, s10 809; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 810; GFX10_W64-NEXT: v_mov_b32_e32 v1, s9 811; GFX10_W64-NEXT: v_mov_b32_e32 v3, s11 812; GFX10_W64-NEXT: v_div_fmas_f64 v[0:1], s[6:7], v[0:1], v[2:3] 813; GFX10_W64-NEXT: v_mov_b32_e32 v2, 0 814; GFX10_W64-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] 815; GFX10_W64-NEXT: s_endpgm 816; 817; GFX11_W32-LABEL: test_div_fmas_f64: 818; GFX11_W32: ; %bb.0: 819; GFX11_W32-NEXT: s_clause 0x1 820; GFX11_W32-NEXT: s_load_b32 s8, s[2:3], 0x20 821; GFX11_W32-NEXT: s_load_b256 s[0:7], s[2:3], 0x0 822; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) 823; GFX11_W32-NEXT: s_and_b32 s8, 1, s8 824; GFX11_W32-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 825; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s8 826; GFX11_W32-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v3, s7 827; GFX11_W32-NEXT: v_div_fmas_f64 v[0:1], s[2:3], v[0:1], v[2:3] 828; GFX11_W32-NEXT: v_mov_b32_e32 v2, 0 829; GFX11_W32-NEXT: global_store_b64 v2, v[0:1], s[0:1] 830; GFX11_W32-NEXT: s_nop 0 831; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 832; GFX11_W32-NEXT: s_endpgm 833; 834; GFX11_W64-LABEL: test_div_fmas_f64: 835; GFX11_W64: ; %bb.0: 836; GFX11_W64-NEXT: s_clause 0x1 837; GFX11_W64-NEXT: s_load_b32 s8, s[2:3], 0x20 838; GFX11_W64-NEXT: s_load_b256 s[0:7], s[2:3], 0x0 839; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) 840; GFX11_W64-NEXT: s_and_b32 s8, 1, s8 841; GFX11_W64-NEXT: v_mov_b32_e32 v0, s4 842; GFX11_W64-NEXT: v_mov_b32_e32 v2, s6 843; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s8 844; GFX11_W64-NEXT: v_mov_b32_e32 v1, s5 845; GFX11_W64-NEXT: v_mov_b32_e32 v3, s7 846; GFX11_W64-NEXT: v_div_fmas_f64 v[0:1], s[2:3], v[0:1], v[2:3] 847; GFX11_W64-NEXT: v_mov_b32_e32 v2, 0 848; GFX11_W64-NEXT: global_store_b64 v2, v[0:1], s[0:1] 849; GFX11_W64-NEXT: s_nop 0 850; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 851; GFX11_W64-NEXT: s_endpgm 852 %result = call double @llvm.amdgcn.div.fmas.f64(double %a, double %b, double %c, i1 %d) 853 store double %result, ptr addrspace(1) %out, align 8 854 ret void 855} 856 857define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %i) { 858; GFX7-LABEL: test_div_fmas_f32_cond_to_vcc: 859; GFX7: ; %bb.0: 860; GFX7-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x2 861; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 862; GFX7-NEXT: s_mov_b32 s3, 0xf000 863; GFX7-NEXT: s_waitcnt lgkmcnt(0) 864; GFX7-NEXT: s_cmp_eq_u32 s7, 0 865; GFX7-NEXT: s_cselect_b32 s2, 1, 0 866; GFX7-NEXT: s_and_b32 s2, 1, s2 867; GFX7-NEXT: v_mov_b32_e32 v0, s4 868; GFX7-NEXT: v_mov_b32_e32 v1, s5 869; GFX7-NEXT: v_mov_b32_e32 v2, s6 870; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 871; GFX7-NEXT: s_mov_b32 s2, -1 872; GFX7-NEXT: s_nop 2 873; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2 874; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 875; GFX7-NEXT: s_endpgm 876; 877; GFX8-LABEL: test_div_fmas_f32_cond_to_vcc: 878; GFX8: ; %bb.0: 879; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x8 880; GFX8-NEXT: s_waitcnt lgkmcnt(0) 881; GFX8-NEXT: s_cmp_eq_u32 s7, 0 882; GFX8-NEXT: s_cselect_b32 s0, 1, 0 883; GFX8-NEXT: s_and_b32 s0, 1, s0 884; GFX8-NEXT: v_mov_b32_e32 v0, s4 885; GFX8-NEXT: v_mov_b32_e32 v1, s5 886; GFX8-NEXT: v_mov_b32_e32 v2, s6 887; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 888; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 889; GFX8-NEXT: s_nop 2 890; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2 891; GFX8-NEXT: s_waitcnt lgkmcnt(0) 892; GFX8-NEXT: v_mov_b32_e32 v0, s0 893; GFX8-NEXT: v_mov_b32_e32 v1, s1 894; GFX8-NEXT: flat_store_dword v[0:1], v2 895; GFX8-NEXT: s_endpgm 896; 897; GFX10_W32-LABEL: test_div_fmas_f32_cond_to_vcc: 898; GFX10_W32: ; %bb.0: 899; GFX10_W32-NEXT: s_clause 0x1 900; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x8 901; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 902; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) 903; GFX10_W32-NEXT: s_cmp_eq_u32 s7, 0 904; GFX10_W32-NEXT: v_mov_b32_e32 v0, s5 905; GFX10_W32-NEXT: s_cselect_b32 s2, 1, 0 906; GFX10_W32-NEXT: v_mov_b32_e32 v1, s6 907; GFX10_W32-NEXT: s_and_b32 s2, 1, s2 908; GFX10_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 909; GFX10_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1 910; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 911; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] 912; GFX10_W32-NEXT: s_endpgm 913; 914; GFX10_W64-LABEL: test_div_fmas_f32_cond_to_vcc: 915; GFX10_W64: ; %bb.0: 916; GFX10_W64-NEXT: s_clause 0x1 917; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x8 918; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 919; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) 920; GFX10_W64-NEXT: s_cmp_eq_u32 s7, 0 921; GFX10_W64-NEXT: v_mov_b32_e32 v0, s5 922; GFX10_W64-NEXT: s_cselect_b32 s2, 1, 0 923; GFX10_W64-NEXT: v_mov_b32_e32 v1, s6 924; GFX10_W64-NEXT: s_and_b32 s2, 1, s2 925; GFX10_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 926; GFX10_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1 927; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 928; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] 929; GFX10_W64-NEXT: s_endpgm 930; 931; GFX11_W32-LABEL: test_div_fmas_f32_cond_to_vcc: 932; GFX11_W32: ; %bb.0: 933; GFX11_W32-NEXT: s_clause 0x1 934; GFX11_W32-NEXT: s_load_b128 s[4:7], s[2:3], 0x8 935; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 936; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) 937; GFX11_W32-NEXT: s_cmp_eq_u32 s7, 0 938; GFX11_W32-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s6 939; GFX11_W32-NEXT: s_cselect_b32 s2, 1, 0 940; GFX11_W32-NEXT: s_and_b32 s2, 1, s2 941; GFX11_W32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s2 942; GFX11_W32-NEXT: v_div_fmas_f32 v0, s4, v0, v1 943; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 944; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] 945; GFX11_W32-NEXT: s_nop 0 946; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 947; GFX11_W32-NEXT: s_endpgm 948; 949; GFX11_W64-LABEL: test_div_fmas_f32_cond_to_vcc: 950; GFX11_W64: ; %bb.0: 951; GFX11_W64-NEXT: s_clause 0x1 952; GFX11_W64-NEXT: s_load_b128 s[4:7], s[2:3], 0x8 953; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 954; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) 955; GFX11_W64-NEXT: s_cmp_eq_u32 s7, 0 956; GFX11_W64-NEXT: v_mov_b32_e32 v0, s5 957; GFX11_W64-NEXT: s_cselect_b32 s2, 1, 0 958; GFX11_W64-NEXT: v_mov_b32_e32 v1, s6 959; GFX11_W64-NEXT: s_and_b32 s2, 1, s2 960; GFX11_W64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s2 961; GFX11_W64-NEXT: v_div_fmas_f32 v0, s4, v0, v1 962; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 963; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] 964; GFX11_W64-NEXT: s_nop 0 965; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 966; GFX11_W64-NEXT: s_endpgm 967 %cmp = icmp eq i32 %i, 0 968 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cmp) 969 store float %result, ptr addrspace(1) %out, align 4 970 ret void 971} 972 973define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) { 974; GFX7-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: 975; GFX7: ; %bb.0: 976; GFX7-NEXT: s_load_dword s4, s[2:3], 0xa 977; GFX7-NEXT: s_load_dword s5, s[2:3], 0x13 978; GFX7-NEXT: s_load_dword s6, s[2:3], 0x1c 979; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 980; GFX7-NEXT: s_mov_b64 vcc, 0 981; GFX7-NEXT: s_waitcnt lgkmcnt(0) 982; GFX7-NEXT: v_mov_b32_e32 v0, s4 983; GFX7-NEXT: v_mov_b32_e32 v1, s5 984; GFX7-NEXT: v_mov_b32_e32 v2, s6 985; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2 986; GFX7-NEXT: s_mov_b32 s2, -1 987; GFX7-NEXT: s_mov_b32 s3, 0xf000 988; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 989; GFX7-NEXT: s_endpgm 990; 991; GFX8-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: 992; GFX8: ; %bb.0: 993; GFX8-NEXT: s_load_dword s0, s[2:3], 0x28 994; GFX8-NEXT: s_load_dword s1, s[2:3], 0x4c 995; GFX8-NEXT: s_load_dword s4, s[2:3], 0x70 996; GFX8-NEXT: s_mov_b64 vcc, 0 997; GFX8-NEXT: s_waitcnt lgkmcnt(0) 998; GFX8-NEXT: v_mov_b32_e32 v0, s0 999; GFX8-NEXT: v_mov_b32_e32 v1, s1 1000; GFX8-NEXT: v_mov_b32_e32 v2, s4 1001; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2 1002; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1003; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1004; GFX8-NEXT: v_mov_b32_e32 v0, s0 1005; GFX8-NEXT: v_mov_b32_e32 v1, s1 1006; GFX8-NEXT: flat_store_dword v[0:1], v2 1007; GFX8-NEXT: s_endpgm 1008; 1009; GFX10_W32-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: 1010; GFX10_W32: ; %bb.0: 1011; GFX10_W32-NEXT: s_clause 0x3 1012; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x4c 1013; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x70 1014; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x28 1015; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1016; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0 1017; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) 1018; GFX10_W32-NEXT: v_mov_b32_e32 v0, s4 1019; GFX10_W32-NEXT: v_mov_b32_e32 v1, s5 1020; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1 1021; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 1022; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] 1023; GFX10_W32-NEXT: s_endpgm 1024; 1025; GFX10_W64-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: 1026; GFX10_W64: ; %bb.0: 1027; GFX10_W64-NEXT: s_clause 0x3 1028; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x4c 1029; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x70 1030; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x28 1031; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1032; GFX10_W64-NEXT: s_mov_b64 vcc, 0 1033; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) 1034; GFX10_W64-NEXT: v_mov_b32_e32 v0, s4 1035; GFX10_W64-NEXT: v_mov_b32_e32 v1, s5 1036; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1 1037; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 1038; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] 1039; GFX10_W64-NEXT: s_endpgm 1040; 1041; GFX11_W32-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: 1042; GFX11_W32: ; %bb.0: 1043; GFX11_W32-NEXT: s_clause 0x3 1044; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x4c 1045; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x70 1046; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x28 1047; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1048; GFX11_W32-NEXT: s_mov_b32 vcc_lo, 0 1049; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) 1050; GFX11_W32-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 1051; GFX11_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1 1052; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 1053; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] 1054; GFX11_W32-NEXT: s_nop 0 1055; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1056; GFX11_W32-NEXT: s_endpgm 1057; 1058; GFX11_W64-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc: 1059; GFX11_W64: ; %bb.0: 1060; GFX11_W64-NEXT: s_clause 0x3 1061; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x4c 1062; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x70 1063; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x28 1064; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1065; GFX11_W64-NEXT: s_mov_b64 vcc, 0 1066; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) 1067; GFX11_W64-NEXT: v_mov_b32_e32 v0, s4 1068; GFX11_W64-NEXT: v_mov_b32_e32 v1, s5 1069; GFX11_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1 1070; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 1071; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] 1072; GFX11_W64-NEXT: s_nop 0 1073; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1074; GFX11_W64-NEXT: s_endpgm 1075 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 false) 1076 store float %result, ptr addrspace(1) %out, align 4 1077 ret void 1078} 1079 1080define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(ptr addrspace(1) %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) { 1081; GFX7-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: 1082; GFX7: ; %bb.0: 1083; GFX7-NEXT: s_load_dword s4, s[2:3], 0xa 1084; GFX7-NEXT: s_load_dword s5, s[2:3], 0x13 1085; GFX7-NEXT: s_load_dword s6, s[2:3], 0x1c 1086; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1087; GFX7-NEXT: s_mov_b64 vcc, -1 1088; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1089; GFX7-NEXT: v_mov_b32_e32 v0, s4 1090; GFX7-NEXT: v_mov_b32_e32 v1, s5 1091; GFX7-NEXT: v_mov_b32_e32 v2, s6 1092; GFX7-NEXT: v_div_fmas_f32 v0, v0, v1, v2 1093; GFX7-NEXT: s_mov_b32 s2, -1 1094; GFX7-NEXT: s_mov_b32 s3, 0xf000 1095; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 1096; GFX7-NEXT: s_endpgm 1097; 1098; GFX8-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: 1099; GFX8: ; %bb.0: 1100; GFX8-NEXT: s_load_dword s0, s[2:3], 0x28 1101; GFX8-NEXT: s_load_dword s1, s[2:3], 0x4c 1102; GFX8-NEXT: s_load_dword s4, s[2:3], 0x70 1103; GFX8-NEXT: s_mov_b64 vcc, -1 1104; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1105; GFX8-NEXT: v_mov_b32_e32 v0, s0 1106; GFX8-NEXT: v_mov_b32_e32 v1, s1 1107; GFX8-NEXT: v_mov_b32_e32 v2, s4 1108; GFX8-NEXT: v_div_fmas_f32 v2, v0, v1, v2 1109; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1110; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1111; GFX8-NEXT: v_mov_b32_e32 v0, s0 1112; GFX8-NEXT: v_mov_b32_e32 v1, s1 1113; GFX8-NEXT: flat_store_dword v[0:1], v2 1114; GFX8-NEXT: s_endpgm 1115; 1116; GFX10_W32-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: 1117; GFX10_W32: ; %bb.0: 1118; GFX10_W32-NEXT: s_clause 0x3 1119; GFX10_W32-NEXT: s_load_dword s4, s[2:3], 0x4c 1120; GFX10_W32-NEXT: s_load_dword s5, s[2:3], 0x70 1121; GFX10_W32-NEXT: s_load_dword s6, s[2:3], 0x28 1122; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1123; GFX10_W32-NEXT: s_mov_b32 vcc_lo, -1 1124; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) 1125; GFX10_W32-NEXT: v_mov_b32_e32 v0, s4 1126; GFX10_W32-NEXT: v_mov_b32_e32 v1, s5 1127; GFX10_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1 1128; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 1129; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] 1130; GFX10_W32-NEXT: s_endpgm 1131; 1132; GFX10_W64-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: 1133; GFX10_W64: ; %bb.0: 1134; GFX10_W64-NEXT: s_clause 0x3 1135; GFX10_W64-NEXT: s_load_dword s4, s[2:3], 0x4c 1136; GFX10_W64-NEXT: s_load_dword s5, s[2:3], 0x70 1137; GFX10_W64-NEXT: s_load_dword s6, s[2:3], 0x28 1138; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1139; GFX10_W64-NEXT: s_mov_b64 vcc, -1 1140; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) 1141; GFX10_W64-NEXT: v_mov_b32_e32 v0, s4 1142; GFX10_W64-NEXT: v_mov_b32_e32 v1, s5 1143; GFX10_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1 1144; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 1145; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] 1146; GFX10_W64-NEXT: s_endpgm 1147; 1148; GFX11_W32-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: 1149; GFX11_W32: ; %bb.0: 1150; GFX11_W32-NEXT: s_clause 0x3 1151; GFX11_W32-NEXT: s_load_b32 s4, s[2:3], 0x4c 1152; GFX11_W32-NEXT: s_load_b32 s5, s[2:3], 0x70 1153; GFX11_W32-NEXT: s_load_b32 s6, s[2:3], 0x28 1154; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1155; GFX11_W32-NEXT: s_mov_b32 vcc_lo, -1 1156; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) 1157; GFX11_W32-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 1158; GFX11_W32-NEXT: v_div_fmas_f32 v0, s6, v0, v1 1159; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 1160; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] 1161; GFX11_W32-NEXT: s_nop 0 1162; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1163; GFX11_W32-NEXT: s_endpgm 1164; 1165; GFX11_W64-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc: 1166; GFX11_W64: ; %bb.0: 1167; GFX11_W64-NEXT: s_clause 0x3 1168; GFX11_W64-NEXT: s_load_b32 s4, s[2:3], 0x4c 1169; GFX11_W64-NEXT: s_load_b32 s5, s[2:3], 0x70 1170; GFX11_W64-NEXT: s_load_b32 s6, s[2:3], 0x28 1171; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1172; GFX11_W64-NEXT: s_mov_b64 vcc, -1 1173; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) 1174; GFX11_W64-NEXT: v_mov_b32_e32 v0, s4 1175; GFX11_W64-NEXT: v_mov_b32_e32 v1, s5 1176; GFX11_W64-NEXT: v_div_fmas_f32 v0, s6, v0, v1 1177; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 1178; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] 1179; GFX11_W64-NEXT: s_nop 0 1180; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1181; GFX11_W64-NEXT: s_endpgm 1182 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 true) 1183 store float %result, ptr addrspace(1) %out, align 4 1184 ret void 1185} 1186 1187define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(ptr addrspace(1) %out, ptr addrspace(1) %in, [8 x i32], i32 %d) { 1188; GFX7-LABEL: test_div_fmas_f32_logical_cond_to_vcc: 1189; GFX7: ; %bb.0: 1190; GFX7-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0 1191; GFX7-NEXT: s_load_dword s0, s[2:3], 0xc 1192; GFX7-NEXT: s_mov_b32 s6, 0 1193; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1194; GFX7-NEXT: v_mov_b32_e32 v2, 0 1195; GFX7-NEXT: s_mov_b32 s7, 0xf000 1196; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1197; GFX7-NEXT: s_mov_b64 s[4:5], s[10:11] 1198; GFX7-NEXT: buffer_load_dword v3, v[1:2], s[4:7], 0 addr64 glc 1199; GFX7-NEXT: s_waitcnt vmcnt(0) 1200; GFX7-NEXT: buffer_load_dword v4, v[1:2], s[4:7], 0 addr64 offset:4 glc 1201; GFX7-NEXT: s_waitcnt vmcnt(0) 1202; GFX7-NEXT: buffer_load_dword v1, v[1:2], s[4:7], 0 addr64 offset:8 glc 1203; GFX7-NEXT: s_waitcnt vmcnt(0) 1204; GFX7-NEXT: s_cmp_lg_u32 s0, 0 1205; GFX7-NEXT: s_cselect_b32 s0, 1, 0 1206; GFX7-NEXT: s_and_b32 s0, 1, s0 1207; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1208; GFX7-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 1209; GFX7-NEXT: s_mov_b32 s6, -1 1210; GFX7-NEXT: s_and_b64 vcc, vcc, s[0:1] 1211; GFX7-NEXT: s_mov_b64 s[10:11], s[6:7] 1212; GFX7-NEXT: v_div_fmas_f32 v0, v3, v4, v1 1213; GFX7-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:8 1214; GFX7-NEXT: s_endpgm 1215; 1216; GFX8-LABEL: test_div_fmas_f32_logical_cond_to_vcc: 1217; GFX8: ; %bb.0: 1218; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1219; GFX8-NEXT: s_load_dword s2, s[2:3], 0x30 1220; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0 1221; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1222; GFX8-NEXT: v_mov_b32_e32 v1, s6 1223; GFX8-NEXT: v_mov_b32_e32 v2, s7 1224; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 1225; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc 1226; GFX8-NEXT: v_add_u32_e32 v3, vcc, 4, v1 1227; GFX8-NEXT: v_addc_u32_e32 v4, vcc, 0, v2, vcc 1228; GFX8-NEXT: v_add_u32_e32 v5, vcc, 8, v1 1229; GFX8-NEXT: v_addc_u32_e32 v6, vcc, 0, v2, vcc 1230; GFX8-NEXT: flat_load_dword v1, v[1:2] glc 1231; GFX8-NEXT: s_waitcnt vmcnt(0) 1232; GFX8-NEXT: flat_load_dword v2, v[3:4] glc 1233; GFX8-NEXT: s_waitcnt vmcnt(0) 1234; GFX8-NEXT: flat_load_dword v3, v[5:6] glc 1235; GFX8-NEXT: s_waitcnt vmcnt(0) 1236; GFX8-NEXT: s_add_u32 s0, s4, 8 1237; GFX8-NEXT: s_addc_u32 s1, s5, 0 1238; GFX8-NEXT: s_cmp_lg_u32 s2, 0 1239; GFX8-NEXT: s_cselect_b32 s2, 1, 0 1240; GFX8-NEXT: s_and_b32 s2, 1, s2 1241; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1242; GFX8-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, s2 1243; GFX8-NEXT: s_and_b64 vcc, vcc, s[2:3] 1244; GFX8-NEXT: s_nop 1 1245; GFX8-NEXT: v_div_fmas_f32 v2, v1, v2, v3 1246; GFX8-NEXT: v_mov_b32_e32 v0, s0 1247; GFX8-NEXT: v_mov_b32_e32 v1, s1 1248; GFX8-NEXT: flat_store_dword v[0:1], v2 1249; GFX8-NEXT: s_endpgm 1250; 1251; GFX10_W32-LABEL: test_div_fmas_f32_logical_cond_to_vcc: 1252; GFX10_W32: ; %bb.0: 1253; GFX10_W32-NEXT: s_clause 0x1 1254; GFX10_W32-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1255; GFX10_W32-NEXT: s_load_dword s0, s[2:3], 0x30 1256; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1257; GFX10_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1258; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) 1259; GFX10_W32-NEXT: global_load_dword v2, v1, s[6:7] glc dlc 1260; GFX10_W32-NEXT: s_waitcnt vmcnt(0) 1261; GFX10_W32-NEXT: global_load_dword v3, v1, s[6:7] offset:4 glc dlc 1262; GFX10_W32-NEXT: s_waitcnt vmcnt(0) 1263; GFX10_W32-NEXT: global_load_dword v4, v1, s[6:7] offset:8 glc dlc 1264; GFX10_W32-NEXT: s_waitcnt vmcnt(0) 1265; GFX10_W32-NEXT: s_cmp_lg_u32 s0, 0 1266; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 1267; GFX10_W32-NEXT: s_cselect_b32 s0, 1, 0 1268; GFX10_W32-NEXT: s_and_b32 s0, 1, s0 1269; GFX10_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 1270; GFX10_W32-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 1271; GFX10_W32-NEXT: v_div_fmas_f32 v0, v2, v3, v4 1272; GFX10_W32-NEXT: global_store_dword v1, v0, s[4:5] offset:8 1273; GFX10_W32-NEXT: s_endpgm 1274; 1275; GFX10_W64-LABEL: test_div_fmas_f32_logical_cond_to_vcc: 1276; GFX10_W64: ; %bb.0: 1277; GFX10_W64-NEXT: s_clause 0x1 1278; GFX10_W64-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1279; GFX10_W64-NEXT: s_load_dword s0, s[2:3], 0x30 1280; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1281; GFX10_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1282; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) 1283; GFX10_W64-NEXT: global_load_dword v2, v1, s[6:7] glc dlc 1284; GFX10_W64-NEXT: s_waitcnt vmcnt(0) 1285; GFX10_W64-NEXT: global_load_dword v3, v1, s[6:7] offset:4 glc dlc 1286; GFX10_W64-NEXT: s_waitcnt vmcnt(0) 1287; GFX10_W64-NEXT: global_load_dword v4, v1, s[6:7] offset:8 glc dlc 1288; GFX10_W64-NEXT: s_waitcnt vmcnt(0) 1289; GFX10_W64-NEXT: s_cmp_lg_u32 s0, 0 1290; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 1291; GFX10_W64-NEXT: s_cselect_b32 s0, 1, 0 1292; GFX10_W64-NEXT: s_and_b32 s0, 1, s0 1293; GFX10_W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 1294; GFX10_W64-NEXT: s_and_b64 vcc, vcc, s[0:1] 1295; GFX10_W64-NEXT: v_div_fmas_f32 v0, v2, v3, v4 1296; GFX10_W64-NEXT: global_store_dword v1, v0, s[4:5] offset:8 1297; GFX10_W64-NEXT: s_endpgm 1298; 1299; GFX11_W32-LABEL: test_div_fmas_f32_logical_cond_to_vcc: 1300; GFX11_W32: ; %bb.0: 1301; GFX11_W32-NEXT: s_clause 0x1 1302; GFX11_W32-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 1303; GFX11_W32-NEXT: s_load_b32 s0, s[2:3], 0x30 1304; GFX11_W32-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1305; GFX11_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1306; GFX11_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1307; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) 1308; GFX11_W32-NEXT: global_load_b32 v2, v1, s[6:7] glc dlc 1309; GFX11_W32-NEXT: s_waitcnt vmcnt(0) 1310; GFX11_W32-NEXT: global_load_b32 v3, v1, s[6:7] offset:4 glc dlc 1311; GFX11_W32-NEXT: s_waitcnt vmcnt(0) 1312; GFX11_W32-NEXT: global_load_b32 v1, v1, s[6:7] offset:8 glc dlc 1313; GFX11_W32-NEXT: s_waitcnt vmcnt(0) 1314; GFX11_W32-NEXT: s_cmp_lg_u32 s0, 0 1315; GFX11_W32-NEXT: s_cselect_b32 s0, 1, 0 1316; GFX11_W32-NEXT: s_and_b32 s0, 1, s0 1317; GFX11_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 1318; GFX11_W32-NEXT: s_and_b32 vcc_lo, vcc_lo, s0 1319; GFX11_W32-NEXT: v_div_fmas_f32 v0, v2, v3, v1 1320; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 1321; GFX11_W32-NEXT: global_store_b32 v1, v0, s[4:5] offset:8 1322; GFX11_W32-NEXT: s_nop 0 1323; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1324; GFX11_W32-NEXT: s_endpgm 1325; 1326; GFX11_W64-LABEL: test_div_fmas_f32_logical_cond_to_vcc: 1327; GFX11_W64: ; %bb.0: 1328; GFX11_W64-NEXT: s_clause 0x1 1329; GFX11_W64-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 1330; GFX11_W64-NEXT: s_load_b32 s0, s[2:3], 0x30 1331; GFX11_W64-NEXT: v_and_b32_e32 v0, 0x3ff, v0 1332; GFX11_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1333; GFX11_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1334; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) 1335; GFX11_W64-NEXT: global_load_b32 v2, v1, s[6:7] glc dlc 1336; GFX11_W64-NEXT: s_waitcnt vmcnt(0) 1337; GFX11_W64-NEXT: global_load_b32 v3, v1, s[6:7] offset:4 glc dlc 1338; GFX11_W64-NEXT: s_waitcnt vmcnt(0) 1339; GFX11_W64-NEXT: global_load_b32 v1, v1, s[6:7] offset:8 glc dlc 1340; GFX11_W64-NEXT: s_waitcnt vmcnt(0) 1341; GFX11_W64-NEXT: s_cmp_lg_u32 s0, 0 1342; GFX11_W64-NEXT: s_cselect_b32 s0, 1, 0 1343; GFX11_W64-NEXT: s_and_b32 s0, 1, s0 1344; GFX11_W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 1345; GFX11_W64-NEXT: s_and_b64 vcc, vcc, s[0:1] 1346; GFX11_W64-NEXT: v_div_fmas_f32 v0, v2, v3, v1 1347; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 1348; GFX11_W64-NEXT: global_store_b32 v1, v0, s[4:5] offset:8 1349; GFX11_W64-NEXT: s_nop 0 1350; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1351; GFX11_W64-NEXT: s_endpgm 1352 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1353 %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid 1354 %gep.b = getelementptr float, ptr addrspace(1) %gep.a, i32 1 1355 %gep.c = getelementptr float, ptr addrspace(1) %gep.a, i32 2 1356 %gep.out = getelementptr float, ptr addrspace(1) %out, i32 2 1357 1358 %a = load volatile float, ptr addrspace(1) %gep.a 1359 %b = load volatile float, ptr addrspace(1) %gep.b 1360 %c = load volatile float, ptr addrspace(1) %gep.c 1361 1362 %cmp0 = icmp eq i32 %tid, 0 1363 %cmp1 = icmp ne i32 %d, 0 1364 %and = and i1 %cmp0, %cmp1 1365 1366 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %and) 1367 store float %result, ptr addrspace(1) %gep.out, align 4 1368 ret void 1369} 1370 1371define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(ptr addrspace(1) %out, [8 x i32], ptr addrspace(1) %in, [8 x i32], ptr addrspace(1) %dummy) { 1372; GFX7-LABEL: test_div_fmas_f32_i1_phi_vcc: 1373; GFX7: ; %bb.0: ; %entry 1374; GFX7-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0xa 1375; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1376; GFX7-NEXT: v_mov_b32_e32 v2, 0 1377; GFX7-NEXT: s_mov_b32 s6, 0 1378; GFX7-NEXT: s_mov_b32 s7, 0xf000 1379; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1380; GFX7-NEXT: buffer_load_dwordx3 v[1:3], v[1:2], s[4:7], 0 addr64 1381; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0 1382; GFX7-NEXT: s_mov_b64 vcc, 0 1383; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[0:1] 1384; GFX7-NEXT: s_cbranch_execz .LBB13_2 1385; GFX7-NEXT: ; %bb.1: ; %bb 1386; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x14 1387; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1388; GFX7-NEXT: s_load_dword s0, s[0:1], 0x0 1389; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1390; GFX7-NEXT: s_cmp_lg_u32 s0, 0 1391; GFX7-NEXT: s_cselect_b32 s0, 1, 0 1392; GFX7-NEXT: s_and_b32 s0, 1, s0 1393; GFX7-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 1394; GFX7-NEXT: s_andn2_b64 s[8:9], 0, exec 1395; GFX7-NEXT: s_and_b64 s[0:1], exec, s[0:1] 1396; GFX7-NEXT: s_or_b64 vcc, s[8:9], s[0:1] 1397; GFX7-NEXT: .LBB13_2: ; %exit 1398; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 1399; GFX7-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1400; GFX7-NEXT: s_waitcnt vmcnt(0) 1401; GFX7-NEXT: v_div_fmas_f32 v0, v1, v2, v3 1402; GFX7-NEXT: s_mov_b32 s6, -1 1403; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1404; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8 1405; GFX7-NEXT: s_endpgm 1406; 1407; GFX8-LABEL: test_div_fmas_f32_i1_phi_vcc: 1408; GFX8: ; %bb.0: ; %entry 1409; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x28 1410; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0 1411; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1412; GFX8-NEXT: v_mov_b32_e32 v2, s1 1413; GFX8-NEXT: v_mov_b32_e32 v1, s0 1414; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3 1415; GFX8-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc 1416; GFX8-NEXT: flat_load_dwordx3 v[1:3], v[1:2] 1417; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0 1418; GFX8-NEXT: s_mov_b64 vcc, 0 1419; GFX8-NEXT: s_and_saveexec_b64 s[4:5], s[0:1] 1420; GFX8-NEXT: s_cbranch_execz .LBB13_2 1421; GFX8-NEXT: ; %bb.1: ; %bb 1422; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x50 1423; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1424; GFX8-NEXT: s_load_dword s0, s[0:1], 0x0 1425; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1426; GFX8-NEXT: s_cmp_lg_u32 s0, 0 1427; GFX8-NEXT: s_cselect_b32 s0, 1, 0 1428; GFX8-NEXT: s_and_b32 s0, 1, s0 1429; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 1430; GFX8-NEXT: s_andn2_b64 s[6:7], 0, exec 1431; GFX8-NEXT: s_and_b64 s[0:1], exec, s[0:1] 1432; GFX8-NEXT: s_or_b64 vcc, s[6:7], s[0:1] 1433; GFX8-NEXT: .LBB13_2: ; %exit 1434; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 1435; GFX8-NEXT: s_waitcnt vmcnt(0) 1436; GFX8-NEXT: v_div_fmas_f32 v2, v1, v2, v3 1437; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1438; GFX8-NEXT: s_waitcnt lgkmcnt(0) 1439; GFX8-NEXT: s_add_u32 s0, s0, 8 1440; GFX8-NEXT: s_addc_u32 s1, s1, 0 1441; GFX8-NEXT: v_mov_b32_e32 v0, s0 1442; GFX8-NEXT: v_mov_b32_e32 v1, s1 1443; GFX8-NEXT: flat_store_dword v[0:1], v2 1444; GFX8-NEXT: s_endpgm 1445; 1446; GFX10_W32-LABEL: test_div_fmas_f32_i1_phi_vcc: 1447; GFX10_W32: ; %bb.0: ; %entry 1448; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x28 1449; GFX10_W32-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1450; GFX10_W32-NEXT: s_mov_b32 vcc_lo, 0 1451; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) 1452; GFX10_W32-NEXT: global_load_dwordx3 v[1:3], v1, s[0:1] 1453; GFX10_W32-NEXT: v_cmp_eq_u32_e64 s0, 0, v0 1454; GFX10_W32-NEXT: s_and_saveexec_b32 s1, s0 1455; GFX10_W32-NEXT: s_cbranch_execz .LBB13_2 1456; GFX10_W32-NEXT: ; %bb.1: ; %bb 1457; GFX10_W32-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x50 1458; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) 1459; GFX10_W32-NEXT: s_load_dword s0, s[4:5], 0x0 1460; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) 1461; GFX10_W32-NEXT: s_cmp_lg_u32 s0, 0 1462; GFX10_W32-NEXT: s_cselect_b32 s0, 1, 0 1463; GFX10_W32-NEXT: s_andn2_b32 s4, 0, exec_lo 1464; GFX10_W32-NEXT: s_and_b32 s0, 1, s0 1465; GFX10_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 1466; GFX10_W32-NEXT: s_and_b32 s0, exec_lo, s0 1467; GFX10_W32-NEXT: s_or_b32 vcc_lo, s4, s0 1468; GFX10_W32-NEXT: .LBB13_2: ; %exit 1469; GFX10_W32-NEXT: s_or_b32 exec_lo, exec_lo, s1 1470; GFX10_W32-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1471; GFX10_W32-NEXT: s_waitcnt vmcnt(0) 1472; GFX10_W32-NEXT: v_div_fmas_f32 v0, v1, v2, v3 1473; GFX10_W32-NEXT: v_mov_b32_e32 v1, 0 1474; GFX10_W32-NEXT: s_waitcnt lgkmcnt(0) 1475; GFX10_W32-NEXT: global_store_dword v1, v0, s[0:1] offset:8 1476; GFX10_W32-NEXT: s_endpgm 1477; 1478; GFX10_W64-LABEL: test_div_fmas_f32_i1_phi_vcc: 1479; GFX10_W64: ; %bb.0: ; %entry 1480; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x28 1481; GFX10_W64-NEXT: v_lshlrev_b32_e32 v1, 2, v0 1482; GFX10_W64-NEXT: s_mov_b64 vcc, 0 1483; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) 1484; GFX10_W64-NEXT: global_load_dwordx3 v[1:3], v1, s[0:1] 1485; GFX10_W64-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0 1486; GFX10_W64-NEXT: s_and_saveexec_b64 s[4:5], s[0:1] 1487; GFX10_W64-NEXT: s_cbranch_execz .LBB13_2 1488; GFX10_W64-NEXT: ; %bb.1: ; %bb 1489; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x50 1490; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) 1491; GFX10_W64-NEXT: s_load_dword s0, s[0:1], 0x0 1492; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) 1493; GFX10_W64-NEXT: s_cmp_lg_u32 s0, 0 1494; GFX10_W64-NEXT: s_cselect_b32 s0, 1, 0 1495; GFX10_W64-NEXT: s_andn2_b64 s[6:7], 0, exec 1496; GFX10_W64-NEXT: s_and_b32 s0, 1, s0 1497; GFX10_W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 1498; GFX10_W64-NEXT: s_and_b64 s[0:1], exec, s[0:1] 1499; GFX10_W64-NEXT: s_or_b64 vcc, s[6:7], s[0:1] 1500; GFX10_W64-NEXT: .LBB13_2: ; %exit 1501; GFX10_W64-NEXT: s_or_b64 exec, exec, s[4:5] 1502; GFX10_W64-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 1503; GFX10_W64-NEXT: s_waitcnt vmcnt(0) 1504; GFX10_W64-NEXT: v_div_fmas_f32 v0, v1, v2, v3 1505; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 1506; GFX10_W64-NEXT: s_waitcnt lgkmcnt(0) 1507; GFX10_W64-NEXT: global_store_dword v1, v0, s[0:1] offset:8 1508; GFX10_W64-NEXT: s_endpgm 1509; 1510; GFX11_W32-LABEL: test_div_fmas_f32_i1_phi_vcc: 1511; GFX11_W32: ; %bb.0: ; %entry 1512; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x28 1513; GFX11_W32-NEXT: v_and_b32_e32 v3, 0x3ff, v0 1514; GFX11_W32-NEXT: s_mov_b32 vcc_lo, 0 1515; GFX11_W32-NEXT: v_lshlrev_b32_e32 v0, 2, v3 1516; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) 1517; GFX11_W32-NEXT: global_load_b96 v[0:2], v0, s[0:1] 1518; GFX11_W32-NEXT: s_mov_b32 s1, exec_lo 1519; GFX11_W32-NEXT: v_cmpx_eq_u32_e32 0, v3 1520; GFX11_W32-NEXT: s_cbranch_execz .LBB13_2 1521; GFX11_W32-NEXT: ; %bb.1: ; %bb 1522; GFX11_W32-NEXT: s_load_b64 s[4:5], s[2:3], 0x50 1523; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) 1524; GFX11_W32-NEXT: s_load_b32 s0, s[4:5], 0x0 1525; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) 1526; GFX11_W32-NEXT: s_cmp_lg_u32 s0, 0 1527; GFX11_W32-NEXT: s_cselect_b32 s0, 1, 0 1528; GFX11_W32-NEXT: s_and_not1_b32 s4, 0, exec_lo 1529; GFX11_W32-NEXT: s_and_b32 s0, 1, s0 1530; GFX11_W32-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 1531; GFX11_W32-NEXT: s_and_b32 s0, exec_lo, s0 1532; GFX11_W32-NEXT: s_or_b32 vcc_lo, s4, s0 1533; GFX11_W32-NEXT: .LBB13_2: ; %exit 1534; GFX11_W32-NEXT: s_or_b32 exec_lo, exec_lo, s1 1535; GFX11_W32-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1536; GFX11_W32-NEXT: s_waitcnt vmcnt(0) 1537; GFX11_W32-NEXT: v_div_fmas_f32 v0, v0, v1, v2 1538; GFX11_W32-NEXT: v_mov_b32_e32 v1, 0 1539; GFX11_W32-NEXT: s_waitcnt lgkmcnt(0) 1540; GFX11_W32-NEXT: global_store_b32 v1, v0, s[0:1] offset:8 1541; GFX11_W32-NEXT: s_nop 0 1542; GFX11_W32-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1543; GFX11_W32-NEXT: s_endpgm 1544; 1545; GFX11_W64-LABEL: test_div_fmas_f32_i1_phi_vcc: 1546; GFX11_W64: ; %bb.0: ; %entry 1547; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x28 1548; GFX11_W64-NEXT: v_and_b32_e32 v3, 0x3ff, v0 1549; GFX11_W64-NEXT: s_mov_b64 vcc, 0 1550; GFX11_W64-NEXT: s_mov_b64 s[4:5], exec 1551; GFX11_W64-NEXT: v_lshlrev_b32_e32 v0, 2, v3 1552; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) 1553; GFX11_W64-NEXT: global_load_b96 v[0:2], v0, s[0:1] 1554; GFX11_W64-NEXT: v_cmpx_eq_u32_e32 0, v3 1555; GFX11_W64-NEXT: s_cbranch_execz .LBB13_2 1556; GFX11_W64-NEXT: ; %bb.1: ; %bb 1557; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x50 1558; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) 1559; GFX11_W64-NEXT: s_load_b32 s0, s[0:1], 0x0 1560; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) 1561; GFX11_W64-NEXT: s_cmp_lg_u32 s0, 0 1562; GFX11_W64-NEXT: s_cselect_b32 s0, 1, 0 1563; GFX11_W64-NEXT: s_and_not1_b64 s[6:7], 0, exec 1564; GFX11_W64-NEXT: s_and_b32 s0, 1, s0 1565; GFX11_W64-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 1566; GFX11_W64-NEXT: s_and_b64 s[0:1], exec, s[0:1] 1567; GFX11_W64-NEXT: s_or_b64 vcc, s[6:7], s[0:1] 1568; GFX11_W64-NEXT: .LBB13_2: ; %exit 1569; GFX11_W64-NEXT: s_or_b64 exec, exec, s[4:5] 1570; GFX11_W64-NEXT: s_load_b64 s[0:1], s[2:3], 0x0 1571; GFX11_W64-NEXT: s_waitcnt vmcnt(0) 1572; GFX11_W64-NEXT: v_div_fmas_f32 v0, v0, v1, v2 1573; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 1574; GFX11_W64-NEXT: s_waitcnt lgkmcnt(0) 1575; GFX11_W64-NEXT: global_store_b32 v1, v0, s[0:1] offset:8 1576; GFX11_W64-NEXT: s_nop 0 1577; GFX11_W64-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 1578; GFX11_W64-NEXT: s_endpgm 1579entry: 1580 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1581 %gep.a = getelementptr float, ptr addrspace(1) %in, i32 %tid 1582 %gep.b = getelementptr float, ptr addrspace(1) %gep.a, i32 1 1583 %gep.c = getelementptr float, ptr addrspace(1) %gep.a, i32 2 1584 1585 %a = load float, ptr addrspace(1) %gep.a 1586 %b = load float, ptr addrspace(1) %gep.b 1587 %c = load float, ptr addrspace(1) %gep.c 1588 1589 %cmp0 = icmp eq i32 %tid, 0 1590 br i1 %cmp0, label %bb, label %exit 1591 1592bb: 1593 %val = load i32, ptr addrspace(1) %dummy 1594 %cmp1 = icmp ne i32 %val, 0 1595 br label %exit 1596 1597exit: 1598 %cond = phi i1 [false, %entry], [%cmp1, %bb] 1599 %gep.out = getelementptr float, ptr addrspace(1) %out, i32 2 1600 %result = call float @llvm.amdgcn.div.fmas.f32(float %a, float %b, float %c, i1 %cond) 1601 store float %result, ptr addrspace(1) %gep.out, align 4 1602 ret void 1603} 1604 1605declare i32 @llvm.amdgcn.workitem.id.x() #0 1606declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) #0 1607declare double @llvm.amdgcn.div.fmas.f64(double, double, double, i1) #0 1608 1609attributes #0 = { nounwind readnone speculatable } 1610