1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-vopd=0 -amdgpu-enable-delay-alu=0 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 --verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s 7 8define amdgpu_ps float @mad_i32_vvv(i32 %a, i32 %b, i32 %c) { 9; GFX9-LABEL: mad_i32_vvv: 10; GFX9: ; %bb.0: 11; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, v1, v[2:3] 12; GFX9-NEXT: ; return to shader part epilog 13; 14; GFX10-LABEL: mad_i32_vvv: 15; GFX10: ; %bb.0: 16; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, v[2:3] 17; GFX10-NEXT: ; return to shader part epilog 18; 19; GFX11-LABEL: mad_i32_vvv: 20; GFX11: ; %bb.0: 21; GFX11-NEXT: v_mov_b32_e32 v3, v1 22; GFX11-NEXT: v_mov_b32_e32 v4, v0 23; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v4, v3, v[2:3] 24; GFX11-NEXT: ; return to shader part epilog 25 %mul = mul i32 %a, %b 26 %add = add i32 %mul, %c 27 %cast = bitcast i32 %add to float 28 ret float %cast 29} 30 31define amdgpu_ps float @mad_i32_sss(i32 inreg %a, i32 inreg %b, i32 inreg %c) { 32; GCN-LABEL: mad_i32_sss: 33; GCN: ; %bb.0: 34; GCN-NEXT: s_mul_i32 s0, s0, s1 35; GCN-NEXT: s_add_i32 s0, s0, s2 36; GCN-NEXT: v_mov_b32_e32 v0, s0 37; GCN-NEXT: ; return to shader part epilog 38 %mul = mul i32 %a, %b 39 %add = add i32 %mul, %c 40 %cast = bitcast i32 %add to float 41 ret float %cast 42} 43 44define amdgpu_ps float @mad_i32_vvc(i32 %a, i32 %b) { 45; GFX9-LABEL: mad_i32_vvc: 46; GFX9: ; %bb.0: 47; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, v1, 42 48; GFX9-NEXT: ; return to shader part epilog 49; 50; GFX10-LABEL: mad_i32_vvc: 51; GFX10: ; %bb.0: 52; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, 42 53; GFX10-NEXT: ; return to shader part epilog 54; 55; GFX11-LABEL: mad_i32_vvc: 56; GFX11: ; %bb.0: 57; GFX11-NEXT: v_mov_b32_e32 v2, v1 58; GFX11-NEXT: v_mov_b32_e32 v3, v0 59; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, 42 60; GFX11-NEXT: ; return to shader part epilog 61 %mul = mul i32 %a, %b 62 %add = add i32 %mul, 42 63 %cast = bitcast i32 %add to float 64 ret float %cast 65} 66 67define amdgpu_ps float @mad_i32_vvi(i32 %a, i32 %b) { 68; GFX9-LABEL: mad_i32_vvi: 69; GFX9: ; %bb.0: 70; GFX9-NEXT: v_mov_b32_e32 v2, 0x12d687 71; GFX9-NEXT: v_mov_b32_e32 v3, 0 72; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, v1, v[2:3] 73; GFX9-NEXT: ; return to shader part epilog 74; 75; GFX10-LABEL: mad_i32_vvi: 76; GFX10: ; %bb.0: 77; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, 0x12d687 78; GFX10-NEXT: ; return to shader part epilog 79; 80; GFX11-LABEL: mad_i32_vvi: 81; GFX11: ; %bb.0: 82; GFX11-NEXT: v_mov_b32_e32 v2, v1 83; GFX11-NEXT: v_mov_b32_e32 v3, v0 84; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, 0x12d687 85; GFX11-NEXT: ; return to shader part epilog 86 %mul = mul i32 %a, %b 87 %add = add i32 %mul, 1234567 88 %cast = bitcast i32 %add to float 89 ret float %cast 90} 91 92define amdgpu_ps float @mad_i32_vvi_neg(i32 %a, i32 %b) { 93; GFX9-LABEL: mad_i32_vvi_neg: 94; GFX9: ; %bb.0: 95; GFX9-NEXT: v_mov_b32_e32 v2, 0xffed2979 96; GFX9-NEXT: v_mov_b32_e32 v3, -1 97; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, v1, v[2:3] 98; GFX9-NEXT: ; return to shader part epilog 99; 100; GFX10-LABEL: mad_i32_vvi_neg: 101; GFX10: ; %bb.0: 102; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, 0xffffffffffed2979 103; GFX10-NEXT: ; return to shader part epilog 104; 105; GFX11-LABEL: mad_i32_vvi_neg: 106; GFX11: ; %bb.0: 107; GFX11-NEXT: v_mov_b32_e32 v2, v1 108; GFX11-NEXT: v_mov_b32_e32 v3, v0 109; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, 0xffffffffffed2979 110; GFX11-NEXT: ; return to shader part epilog 111 %mul = mul i32 %a, %b 112 %add = add i32 %mul, -1234567 113 %cast = bitcast i32 %add to float 114 ret float %cast 115} 116 117define amdgpu_ps float @mad_i32_vcv(i32 %a, i32 %c) { 118; GFX9-LABEL: mad_i32_vcv: 119; GFX9: ; %bb.0: 120; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 42, v[1:2] 121; GFX9-NEXT: ; return to shader part epilog 122; 123; GFX10-LABEL: mad_i32_vcv: 124; GFX10: ; %bb.0: 125; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 42, v[1:2] 126; GFX10-NEXT: ; return to shader part epilog 127; 128; GFX11-LABEL: mad_i32_vcv: 129; GFX11: ; %bb.0: 130; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v0, 42, v[1:2] 131; GFX11-NEXT: v_mov_b32_e32 v0, v2 132; GFX11-NEXT: ; return to shader part epilog 133 %mul = mul i32 %a, 42 134 %add = add i32 %mul, %c 135 %cast = bitcast i32 %add to float 136 ret float %cast 137} 138 139define amdgpu_ps float @mad_i32_vcc(i32 %a) { 140; GFX9-LABEL: mad_i32_vcc: 141; GFX9: ; %bb.0: 142; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, 42, 43 143; GFX9-NEXT: ; return to shader part epilog 144; 145; GFX10-LABEL: mad_i32_vcc: 146; GFX10: ; %bb.0: 147; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, 42, 43 148; GFX10-NEXT: ; return to shader part epilog 149; 150; GFX11-LABEL: mad_i32_vcc: 151; GFX11: ; %bb.0: 152; GFX11-NEXT: v_mov_b32_e32 v2, v0 153; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, 42, 43 154; GFX11-NEXT: ; return to shader part epilog 155 %mul = mul i32 %a, 42 156 %add = add i32 %mul, 43 157 %cast = bitcast i32 %add to float 158 ret float %cast 159} 160 161define amdgpu_ps float @mad_i32_vvs(i32 %a, i32 %b, i32 inreg %c) { 162; GFX9-LABEL: mad_i32_vvs: 163; GFX9: ; %bb.0: 164; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, v1, s[0:1] 165; GFX9-NEXT: ; return to shader part epilog 166; 167; GFX10-LABEL: mad_i32_vvs: 168; GFX10: ; %bb.0: 169; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, v1, s[0:1] 170; GFX10-NEXT: ; return to shader part epilog 171; 172; GFX11-LABEL: mad_i32_vvs: 173; GFX11: ; %bb.0: 174; GFX11-NEXT: v_mov_b32_e32 v2, v1 175; GFX11-NEXT: v_mov_b32_e32 v3, v0 176; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v3, v2, s[0:1] 177; GFX11-NEXT: ; return to shader part epilog 178 %mul = mul i32 %a, %b 179 %add = add i32 %mul, %c 180 %cast = bitcast i32 %add to float 181 ret float %cast 182} 183 184define amdgpu_ps float @mad_i32_vsv(i32 %a, i32 inreg %b, i32 %c) { 185; GFX9-LABEL: mad_i32_vsv: 186; GFX9: ; %bb.0: 187; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v0, s0, v[1:2] 188; GFX9-NEXT: ; return to shader part epilog 189; 190; GFX10-LABEL: mad_i32_vsv: 191; GFX10: ; %bb.0: 192; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, v[1:2] 193; GFX10-NEXT: ; return to shader part epilog 194; 195; GFX11-LABEL: mad_i32_vsv: 196; GFX11: ; %bb.0: 197; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, v0, s0, v[1:2] 198; GFX11-NEXT: v_mov_b32_e32 v0, v2 199; GFX11-NEXT: ; return to shader part epilog 200 %mul = mul i32 %a, %b 201 %add = add i32 %mul, %c 202 %cast = bitcast i32 %add to float 203 ret float %cast 204} 205 206define amdgpu_ps float @mad_i32_svv(i32 inreg %a, i32 %b, i32 %c) { 207; GFX9-LABEL: mad_i32_svv: 208; GFX9: ; %bb.0: 209; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[0:1], s0, v0, v[1:2] 210; GFX9-NEXT: ; return to shader part epilog 211; 212; GFX10-LABEL: mad_i32_svv: 213; GFX10: ; %bb.0: 214; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v0, v[1:2] 215; GFX10-NEXT: ; return to shader part epilog 216; 217; GFX11-LABEL: mad_i32_svv: 218; GFX11: ; %bb.0: 219; GFX11-NEXT: v_mad_u64_u32 v[2:3], null, s0, v0, v[1:2] 220; GFX11-NEXT: v_mov_b32_e32 v0, v2 221; GFX11-NEXT: ; return to shader part epilog 222 %mul = mul i32 %a, %b 223 %add = add i32 %mul, %c 224 %cast = bitcast i32 %add to float 225 ret float %cast 226} 227 228define amdgpu_ps float @mad_i32_vss(i32 %a, i32 inreg %b, i32 inreg %c) { 229; GFX9-LABEL: mad_i32_vss: 230; GFX9: ; %bb.0: 231; GFX9-NEXT: v_mul_lo_u32 v0, v0, s0 232; GFX9-NEXT: v_add_u32_e32 v0, s1, v0 233; GFX9-NEXT: ; return to shader part epilog 234; 235; GFX10-LABEL: mad_i32_vss: 236; GFX10: ; %bb.0: 237; GFX10-NEXT: s_mov_b32 s2, s1 238; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, v0, s0, s[2:3] 239; GFX10-NEXT: ; return to shader part epilog 240; 241; GFX11-LABEL: mad_i32_vss: 242; GFX11: ; %bb.0: 243; GFX11-NEXT: v_mov_b32_e32 v2, v0 244; GFX11-NEXT: s_mov_b32 s2, s1 245; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, v2, s0, s[2:3] 246; GFX11-NEXT: ; return to shader part epilog 247 %mul = mul i32 %a, %b 248 %add = add i32 %mul, %c 249 %cast = bitcast i32 %add to float 250 ret float %cast 251} 252 253define amdgpu_ps float @mad_i32_svs(i32 inreg %a, i32 %b, i32 inreg %c) { 254; GFX9-LABEL: mad_i32_svs: 255; GFX9: ; %bb.0: 256; GFX9-NEXT: v_mul_lo_u32 v0, s0, v0 257; GFX9-NEXT: v_add_u32_e32 v0, s1, v0 258; GFX9-NEXT: ; return to shader part epilog 259; 260; GFX10-LABEL: mad_i32_svs: 261; GFX10: ; %bb.0: 262; GFX10-NEXT: s_mov_b32 s2, s1 263; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, v0, s[2:3] 264; GFX10-NEXT: ; return to shader part epilog 265; 266; GFX11-LABEL: mad_i32_svs: 267; GFX11: ; %bb.0: 268; GFX11-NEXT: v_mov_b32_e32 v2, v0 269; GFX11-NEXT: s_mov_b32 s2, s1 270; GFX11-NEXT: v_mad_u64_u32 v[0:1], null, s0, v2, s[2:3] 271; GFX11-NEXT: ; return to shader part epilog 272 %mul = mul i32 %a, %b 273 %add = add i32 %mul, %c 274 %cast = bitcast i32 %add to float 275 ret float %cast 276} 277 278define amdgpu_ps float @mad_i32_ssv(i32 inreg %a, i32 inreg %b, i32 %c) { 279; GFX9-LABEL: mad_i32_ssv: 280; GFX9: ; %bb.0: 281; GFX9-NEXT: s_mul_i32 s0, s0, s1 282; GFX9-NEXT: v_add_u32_e32 v0, s0, v0 283; GFX9-NEXT: ; return to shader part epilog 284; 285; GFX10-LABEL: mad_i32_ssv: 286; GFX10: ; %bb.0: 287; GFX10-NEXT: v_mad_u64_u32 v[0:1], null, s0, s1, v[0:1] 288; GFX10-NEXT: ; return to shader part epilog 289; 290; GFX11-LABEL: mad_i32_ssv: 291; GFX11: ; %bb.0: 292; GFX11-NEXT: v_mad_u64_u32 v[1:2], null, s0, s1, v[0:1] 293; GFX11-NEXT: v_mov_b32_e32 v0, v1 294; GFX11-NEXT: ; return to shader part epilog 295 %mul = mul i32 %a, %b 296 %add = add i32 %mul, %c 297 %cast = bitcast i32 %add to float 298 ret float %cast 299} 300 301define amdgpu_ps float @mad_i32_vvv_multiuse(i32 %a, i32 %b, i32 %c) { 302; GFX9-LABEL: mad_i32_vvv_multiuse: 303; GFX9: ; %bb.0: 304; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1 305; GFX9-NEXT: v_add_u32_e32 v0, v1, v2 306; GFX9-NEXT: flat_store_dword v[0:1], v1 307; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 308; GFX9-NEXT: ; return to shader part epilog 309; 310; GFX10-LABEL: mad_i32_vvv_multiuse: 311; GFX10: ; %bb.0: 312; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1 313; GFX10-NEXT: v_add_nc_u32_e32 v0, v1, v2 314; GFX10-NEXT: flat_store_dword v[0:1], v1 315; GFX10-NEXT: s_waitcnt lgkmcnt(0) 316; GFX10-NEXT: ; return to shader part epilog 317; 318; GFX11-LABEL: mad_i32_vvv_multiuse: 319; GFX11: ; %bb.0: 320; GFX11-NEXT: v_mul_lo_u32 v1, v0, v1 321; GFX11-NEXT: v_add_nc_u32_e32 v0, v1, v2 322; GFX11-NEXT: flat_store_b32 v[0:1], v1 323; GFX11-NEXT: s_waitcnt lgkmcnt(0) 324; GFX11-NEXT: ; return to shader part epilog 325 %mul = mul i32 %a, %b 326 %add = add i32 %mul, %c 327 store i32 %mul, ptr undef 328 %cast = bitcast i32 %add to float 329 ret float %cast 330} 331