1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 6; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s 7 8define float @v_fma_f32(float %x, float %y, float %z) { 9; GFX6-LABEL: v_fma_f32: 10; GFX6: ; %bb.0: 11; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2 13; GFX6-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX8-LABEL: v_fma_f32: 16; GFX8: ; %bb.0: 17; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX8-NEXT: v_fma_f32 v0, v0, v1, v2 19; GFX8-NEXT: s_setpc_b64 s[30:31] 20; 21; GFX9-LABEL: v_fma_f32: 22; GFX9: ; %bb.0: 23; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2 25; GFX9-NEXT: s_setpc_b64 s[30:31] 26; 27; GFX10-LABEL: v_fma_f32: 28; GFX10: ; %bb.0: 29; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 30; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2 31; GFX10-NEXT: s_setpc_b64 s[30:31] 32; 33; GFX11-LABEL: v_fma_f32: 34; GFX11: ; %bb.0: 35; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36; GFX11-NEXT: v_fma_f32 v0, v0, v1, v2 37; GFX11-NEXT: s_setpc_b64 s[30:31] 38 %fma = call float @llvm.fma.f32(float %x, float %y, float %z) 39 ret float %fma 40} 41 42define <2 x float> @v_fma_v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) { 43; GFX6-LABEL: v_fma_v2f32: 44; GFX6: ; %bb.0: 45; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4 47; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5 48; GFX6-NEXT: s_setpc_b64 s[30:31] 49; 50; GFX8-LABEL: v_fma_v2f32: 51; GFX8: ; %bb.0: 52; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 53; GFX8-NEXT: v_fma_f32 v0, v0, v2, v4 54; GFX8-NEXT: v_fma_f32 v1, v1, v3, v5 55; GFX8-NEXT: s_setpc_b64 s[30:31] 56; 57; GFX9-LABEL: v_fma_v2f32: 58; GFX9: ; %bb.0: 59; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 60; GFX9-NEXT: v_fma_f32 v0, v0, v2, v4 61; GFX9-NEXT: v_fma_f32 v1, v1, v3, v5 62; GFX9-NEXT: s_setpc_b64 s[30:31] 63; 64; GFX10-LABEL: v_fma_v2f32: 65; GFX10: ; %bb.0: 66; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 67; GFX10-NEXT: v_fma_f32 v0, v0, v2, v4 68; GFX10-NEXT: v_fma_f32 v1, v1, v3, v5 69; GFX10-NEXT: s_setpc_b64 s[30:31] 70; 71; GFX11-LABEL: v_fma_v2f32: 72; GFX11: ; %bb.0: 73; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 74; GFX11-NEXT: v_fma_f32 v0, v0, v2, v4 75; GFX11-NEXT: v_fma_f32 v1, v1, v3, v5 76; GFX11-NEXT: s_setpc_b64 s[30:31] 77 %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> %y, <2 x float> %z) 78 ret <2 x float> %fma 79} 80 81define half @v_fma_f16(half %x, half %y, half %z) { 82; GFX6-LABEL: v_fma_f16: 83; GFX6: ; %bb.0: 84; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 85; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 86; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 87; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 88; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2 89; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 90; GFX6-NEXT: s_setpc_b64 s[30:31] 91; 92; GFX8-LABEL: v_fma_f16: 93; GFX8: ; %bb.0: 94; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 95; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2 96; GFX8-NEXT: s_setpc_b64 s[30:31] 97; 98; GFX9-LABEL: v_fma_f16: 99; GFX9: ; %bb.0: 100; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 101; GFX9-NEXT: v_fma_f16 v0, v0, v1, v2 102; GFX9-NEXT: s_setpc_b64 s[30:31] 103; 104; GFX10-LABEL: v_fma_f16: 105; GFX10: ; %bb.0: 106; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 107; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2 108; GFX10-NEXT: s_setpc_b64 s[30:31] 109; 110; GFX11-LABEL: v_fma_f16: 111; GFX11: ; %bb.0: 112; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2 114; GFX11-NEXT: s_setpc_b64 s[30:31] 115 %fma = call half @llvm.fma.f16(half %x, half %y, half %z) 116 ret half %fma 117} 118 119define half @v_fma_f16_fneg_lhs(half %x, half %y, half %z) { 120; GFX6-LABEL: v_fma_f16_fneg_lhs: 121; GFX6: ; %bb.0: 122; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 123; GFX6-NEXT: v_cvt_f32_f16_e64 v0, -v0 124; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 125; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 126; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2 127; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 128; GFX6-NEXT: s_setpc_b64 s[30:31] 129; 130; GFX8-LABEL: v_fma_f16_fneg_lhs: 131; GFX8: ; %bb.0: 132; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GFX8-NEXT: v_fma_f16 v0, -v0, v1, v2 134; GFX8-NEXT: s_setpc_b64 s[30:31] 135; 136; GFX9-LABEL: v_fma_f16_fneg_lhs: 137; GFX9: ; %bb.0: 138; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 139; GFX9-NEXT: v_fma_f16 v0, -v0, v1, v2 140; GFX9-NEXT: s_setpc_b64 s[30:31] 141; 142; GFX10-LABEL: v_fma_f16_fneg_lhs: 143; GFX10: ; %bb.0: 144; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 145; GFX10-NEXT: v_fma_f16 v0, -v0, v1, v2 146; GFX10-NEXT: s_setpc_b64 s[30:31] 147; 148; GFX11-LABEL: v_fma_f16_fneg_lhs: 149; GFX11: ; %bb.0: 150; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 151; GFX11-NEXT: v_fma_f16 v0, -v0, v1, v2 152; GFX11-NEXT: s_setpc_b64 s[30:31] 153 %neg.x = fneg half %x 154 %fma = call half @llvm.fma.f16(half %neg.x, half %y, half %z) 155 ret half %fma 156} 157 158define half @v_fma_f16_fneg_rhs(half %x, half %y, half %z) { 159; GFX6-LABEL: v_fma_f16_fneg_rhs: 160; GFX6: ; %bb.0: 161; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 162; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 163; GFX6-NEXT: v_cvt_f32_f16_e64 v1, -v1 164; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 165; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2 166; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 167; GFX6-NEXT: s_setpc_b64 s[30:31] 168; 169; GFX8-LABEL: v_fma_f16_fneg_rhs: 170; GFX8: ; %bb.0: 171; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 172; GFX8-NEXT: v_fma_f16 v0, v0, -v1, v2 173; GFX8-NEXT: s_setpc_b64 s[30:31] 174; 175; GFX9-LABEL: v_fma_f16_fneg_rhs: 176; GFX9: ; %bb.0: 177; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; GFX9-NEXT: v_fma_f16 v0, v0, -v1, v2 179; GFX9-NEXT: s_setpc_b64 s[30:31] 180; 181; GFX10-LABEL: v_fma_f16_fneg_rhs: 182; GFX10: ; %bb.0: 183; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 184; GFX10-NEXT: v_fma_f16 v0, v0, -v1, v2 185; GFX10-NEXT: s_setpc_b64 s[30:31] 186; 187; GFX11-LABEL: v_fma_f16_fneg_rhs: 188; GFX11: ; %bb.0: 189; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 190; GFX11-NEXT: v_fma_f16 v0, v0, -v1, v2 191; GFX11-NEXT: s_setpc_b64 s[30:31] 192 %neg.y = fneg half %y 193 %fma = call half @llvm.fma.f16(half %x, half %neg.y, half %z) 194 ret half %fma 195} 196 197define half @v_fma_f16_fneg_add(half %x, half %y, half %z) { 198; GFX6-LABEL: v_fma_f16_fneg_add: 199; GFX6: ; %bb.0: 200; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 202; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 203; GFX6-NEXT: v_cvt_f32_f16_e64 v2, -v2 204; GFX6-NEXT: v_fma_f32 v0, v0, v1, v2 205; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 206; GFX6-NEXT: s_setpc_b64 s[30:31] 207; 208; GFX8-LABEL: v_fma_f16_fneg_add: 209; GFX8: ; %bb.0: 210; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 211; GFX8-NEXT: v_fma_f16 v0, v0, v1, -v2 212; GFX8-NEXT: s_setpc_b64 s[30:31] 213; 214; GFX9-LABEL: v_fma_f16_fneg_add: 215; GFX9: ; %bb.0: 216; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 217; GFX9-NEXT: v_fma_f16 v0, v0, v1, -v2 218; GFX9-NEXT: s_setpc_b64 s[30:31] 219; 220; GFX10-LABEL: v_fma_f16_fneg_add: 221; GFX10: ; %bb.0: 222; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 223; GFX10-NEXT: v_fma_f16 v0, v0, v1, -v2 224; GFX10-NEXT: s_setpc_b64 s[30:31] 225; 226; GFX11-LABEL: v_fma_f16_fneg_add: 227; GFX11: ; %bb.0: 228; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 229; GFX11-NEXT: v_fma_f16 v0, v0, v1, -v2 230; GFX11-NEXT: s_setpc_b64 s[30:31] 231 %neg.z = fneg half %z 232 %fma = call half @llvm.fma.f16(half %x, half %y, half %neg.z) 233 ret half %fma 234} 235 236define <2 x half> @v_fma_v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z) { 237; GFX6-LABEL: v_fma_v2f16: 238; GFX6: ; %bb.0: 239; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 240; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 241; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 242; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 243; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 244; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 245; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 246; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4 247; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 248; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5 249; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 250; GFX6-NEXT: s_setpc_b64 s[30:31] 251; 252; GFX8-LABEL: v_fma_v2f16: 253; GFX8: ; %bb.0: 254; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 255; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 256; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 257; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 258; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2 259; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5 260; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 261; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 262; GFX8-NEXT: s_setpc_b64 s[30:31] 263; 264; GFX9-LABEL: v_fma_v2f16: 265; GFX9: ; %bb.0: 266; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 267; GFX9-NEXT: v_pk_fma_f16 v0, v0, v1, v2 268; GFX9-NEXT: s_setpc_b64 s[30:31] 269; 270; GFX10-LABEL: v_fma_v2f16: 271; GFX10: ; %bb.0: 272; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 273; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 274; GFX10-NEXT: s_setpc_b64 s[30:31] 275; 276; GFX11-LABEL: v_fma_v2f16: 277; GFX11: ; %bb.0: 278; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 279; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 280; GFX11-NEXT: s_setpc_b64 s[30:31] 281 %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> %y, <2 x half> %z) 282 ret <2 x half> %fma 283} 284 285define <2 x half> @v_fma_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y, <2 x half> %z) { 286; GFX6-LABEL: v_fma_v2f16_fneg_lhs: 287; GFX6: ; %bb.0: 288; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 289; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 290; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 291; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 292; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 293; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 294; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 295; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 296; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 297; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 298; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 299; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 300; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4 301; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 302; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5 303; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 304; GFX6-NEXT: s_setpc_b64 s[30:31] 305; 306; GFX8-LABEL: v_fma_v2f16_fneg_lhs: 307; GFX8: ; %bb.0: 308; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 309; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 310; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 311; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 312; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 313; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2 314; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5 315; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 316; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 317; GFX8-NEXT: s_setpc_b64 s[30:31] 318; 319; GFX9-LABEL: v_fma_v2f16_fneg_lhs: 320; GFX9: ; %bb.0: 321; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 322; GFX9-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] 323; GFX9-NEXT: s_setpc_b64 s[30:31] 324; 325; GFX10-LABEL: v_fma_v2f16_fneg_lhs: 326; GFX10: ; %bb.0: 327; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 328; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] 329; GFX10-NEXT: s_setpc_b64 s[30:31] 330; 331; GFX11-LABEL: v_fma_v2f16_fneg_lhs: 332; GFX11: ; %bb.0: 333; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 334; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[1,0,0] neg_hi:[1,0,0] 335; GFX11-NEXT: s_setpc_b64 s[30:31] 336 %x.fneg = fneg <2 x half> %x 337 %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x.fneg, <2 x half> %y, <2 x half> %z) 338 ret <2 x half> %fma 339} 340 341define <2 x half> @v_fma_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y, <2 x half> %z) { 342; GFX6-LABEL: v_fma_v2f16_fneg_rhs: 343; GFX6: ; %bb.0: 344; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 345; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 346; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 347; GFX6-NEXT: v_or_b32_e32 v2, v3, v2 348; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 349; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 350; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 351; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 352; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 353; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 354; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 355; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 356; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4 357; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 358; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5 359; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 360; GFX6-NEXT: s_setpc_b64 s[30:31] 361; 362; GFX8-LABEL: v_fma_v2f16_fneg_rhs: 363; GFX8: ; %bb.0: 364; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 365; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 366; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 367; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 368; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 369; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2 370; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5 371; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 372; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 373; GFX8-NEXT: s_setpc_b64 s[30:31] 374; 375; GFX9-LABEL: v_fma_v2f16_fneg_rhs: 376; GFX9: ; %bb.0: 377; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 378; GFX9-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0] 379; GFX9-NEXT: s_setpc_b64 s[30:31] 380; 381; GFX10-LABEL: v_fma_v2f16_fneg_rhs: 382; GFX10: ; %bb.0: 383; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 384; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0] 385; GFX10-NEXT: s_setpc_b64 s[30:31] 386; 387; GFX11-LABEL: v_fma_v2f16_fneg_rhs: 388; GFX11: ; %bb.0: 389; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 390; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 neg_lo:[0,1,0] neg_hi:[0,1,0] 391; GFX11-NEXT: s_setpc_b64 s[30:31] 392 %y.fneg = fneg <2 x half> %y 393 %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> %y.fneg, <2 x half> %z) 394 ret <2 x half> %fma 395} 396 397define <2 x half> @v_fma_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y, <2 x half> %z) { 398; GFX6-LABEL: v_fma_v2f16_fneg_lhs_rhs: 399; GFX6: ; %bb.0: 400; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 401; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 402; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 403; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 404; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 405; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 406; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 407; GFX6-NEXT: v_fma_f32 v0, v0, v2, v4 408; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 409; GFX6-NEXT: v_fma_f32 v1, v1, v3, v5 410; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 411; GFX6-NEXT: s_setpc_b64 s[30:31] 412; 413; GFX8-LABEL: v_fma_v2f16_fneg_lhs_rhs: 414; GFX8: ; %bb.0: 415; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 416; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 417; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 418; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v2 419; GFX8-NEXT: v_fma_f16 v0, v0, v1, v2 420; GFX8-NEXT: v_fma_f16 v1, v3, v4, v5 421; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 422; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 423; GFX8-NEXT: s_setpc_b64 s[30:31] 424; 425; GFX9-LABEL: v_fma_v2f16_fneg_lhs_rhs: 426; GFX9: ; %bb.0: 427; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 428; GFX9-NEXT: v_pk_fma_f16 v0, v0, v1, v2 429; GFX9-NEXT: s_setpc_b64 s[30:31] 430; 431; GFX10-LABEL: v_fma_v2f16_fneg_lhs_rhs: 432; GFX10: ; %bb.0: 433; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 434; GFX10-NEXT: v_pk_fma_f16 v0, v0, v1, v2 435; GFX10-NEXT: s_setpc_b64 s[30:31] 436; 437; GFX11-LABEL: v_fma_v2f16_fneg_lhs_rhs: 438; GFX11: ; %bb.0: 439; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 440; GFX11-NEXT: v_pk_fma_f16 v0, v0, v1, v2 441; GFX11-NEXT: s_setpc_b64 s[30:31] 442 %x.fneg = fneg <2 x half> %x 443 %y.fneg = fneg <2 x half> %y 444 %fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg, <2 x half> %z) 445 ret <2 x half> %fma 446} 447 448define <3 x half> @v_fma_v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z) { 449; GFX6-LABEL: v_fma_v3f16: 450; GFX6: ; %bb.0: 451; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 452; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 453; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 454; GFX6-NEXT: v_cvt_f32_f16_e32 v6, v6 455; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 456; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 457; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 458; GFX6-NEXT: v_fma_f32 v0, v0, v3, v6 459; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v4 460; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v7 461; GFX6-NEXT: v_cvt_f32_f16_e32 v6, v8 462; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 463; GFX6-NEXT: v_fma_f32 v1, v1, v3, v4 464; GFX6-NEXT: v_fma_f32 v2, v2, v5, v6 465; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 466; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 467; GFX6-NEXT: s_setpc_b64 s[30:31] 468; 469; GFX8-LABEL: v_fma_v3f16: 470; GFX8: ; %bb.0: 471; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 472; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 473; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v2 474; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v4 475; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4 476; GFX8-NEXT: v_fma_f16 v2, v6, v7, v8 477; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 478; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5 479; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 480; GFX8-NEXT: s_setpc_b64 s[30:31] 481; 482; GFX9-LABEL: v_fma_v3f16: 483; GFX9: ; %bb.0: 484; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 485; GFX9-NEXT: v_pk_fma_f16 v0, v0, v2, v4 486; GFX9-NEXT: v_pk_fma_f16 v1, v1, v3, v5 487; GFX9-NEXT: s_setpc_b64 s[30:31] 488; 489; GFX10-LABEL: v_fma_v3f16: 490; GFX10: ; %bb.0: 491; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 492; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4 493; GFX10-NEXT: v_pk_fma_f16 v1, v1, v3, v5 494; GFX10-NEXT: s_setpc_b64 s[30:31] 495; 496; GFX11-LABEL: v_fma_v3f16: 497; GFX11: ; %bb.0: 498; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 499; GFX11-NEXT: v_pk_fma_f16 v0, v0, v2, v4 500; GFX11-NEXT: v_pk_fma_f16 v1, v1, v3, v5 501; GFX11-NEXT: s_setpc_b64 s[30:31] 502 %fma = call <3 x half> @llvm.fma.v3f16(<3 x half> %x, <3 x half> %y, <3 x half> %z) 503 ret <3 x half> %fma 504} 505 506define <4 x half> @v_fma_v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z) { 507; GFX6-LABEL: v_fma_v4f16: 508; GFX6: ; %bb.0: 509; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 510; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 511; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 512; GFX6-NEXT: v_cvt_f32_f16_e32 v8, v8 513; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 514; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v5 515; GFX6-NEXT: v_cvt_f32_f16_e32 v9, v9 516; GFX6-NEXT: v_fma_f32 v0, v0, v4, v8 517; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 518; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v6 519; GFX6-NEXT: v_fma_f32 v1, v1, v5, v9 520; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v10 521; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 522; GFX6-NEXT: v_cvt_f32_f16_e32 v6, v7 523; GFX6-NEXT: v_cvt_f32_f16_e32 v7, v11 524; GFX6-NEXT: v_fma_f32 v2, v2, v4, v5 525; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 526; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 527; GFX6-NEXT: v_fma_f32 v3, v3, v6, v7 528; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 529; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 530; GFX6-NEXT: s_setpc_b64 s[30:31] 531; 532; GFX8-LABEL: v_fma_v4f16: 533; GFX8: ; %bb.0: 534; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 535; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 536; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v2 537; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v4 538; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v1 539; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v3 540; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v5 541; GFX8-NEXT: v_fma_f16 v0, v0, v2, v4 542; GFX8-NEXT: v_fma_f16 v2, v6, v8, v10 543; GFX8-NEXT: v_fma_f16 v1, v1, v3, v5 544; GFX8-NEXT: v_fma_f16 v3, v7, v9, v11 545; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 546; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 547; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v3 548; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 549; GFX8-NEXT: s_setpc_b64 s[30:31] 550; 551; GFX9-LABEL: v_fma_v4f16: 552; GFX9: ; %bb.0: 553; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 554; GFX9-NEXT: v_pk_fma_f16 v0, v0, v2, v4 555; GFX9-NEXT: v_pk_fma_f16 v1, v1, v3, v5 556; GFX9-NEXT: s_setpc_b64 s[30:31] 557; 558; GFX10-LABEL: v_fma_v4f16: 559; GFX10: ; %bb.0: 560; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 561; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4 562; GFX10-NEXT: v_pk_fma_f16 v1, v1, v3, v5 563; GFX10-NEXT: s_setpc_b64 s[30:31] 564; 565; GFX11-LABEL: v_fma_v4f16: 566; GFX11: ; %bb.0: 567; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 568; GFX11-NEXT: v_pk_fma_f16 v0, v0, v2, v4 569; GFX11-NEXT: v_pk_fma_f16 v1, v1, v3, v5 570; GFX11-NEXT: s_setpc_b64 s[30:31] 571 %fma = call <4 x half> @llvm.fma.v4f16(<4 x half> %x, <4 x half> %y, <4 x half> %z) 572 ret <4 x half> %fma 573} 574 575define double @v_fma_f64(double %x, double %y, double %z) { 576; GFX6-LABEL: v_fma_f64: 577; GFX6: ; %bb.0: 578; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 579; GFX6-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 580; GFX6-NEXT: s_setpc_b64 s[30:31] 581; 582; GFX8-LABEL: v_fma_f64: 583; GFX8: ; %bb.0: 584; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 585; GFX8-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 586; GFX8-NEXT: s_setpc_b64 s[30:31] 587; 588; GFX9-LABEL: v_fma_f64: 589; GFX9: ; %bb.0: 590; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 591; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 592; GFX9-NEXT: s_setpc_b64 s[30:31] 593; 594; GFX10-LABEL: v_fma_f64: 595; GFX10: ; %bb.0: 596; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 597; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 598; GFX10-NEXT: s_setpc_b64 s[30:31] 599; 600; GFX11-LABEL: v_fma_f64: 601; GFX11: ; %bb.0: 602; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 603; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 604; GFX11-NEXT: s_setpc_b64 s[30:31] 605 %fma = call double @llvm.fma.f64(double %x, double %y, double %z) 606 ret double %fma 607} 608 609define double @v_fma_f64_fneg_all(double %x, double %y, double %z) { 610; GFX6-LABEL: v_fma_f64_fneg_all: 611; GFX6: ; %bb.0: 612; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 613; GFX6-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] 614; GFX6-NEXT: s_setpc_b64 s[30:31] 615; 616; GFX8-LABEL: v_fma_f64_fneg_all: 617; GFX8: ; %bb.0: 618; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 619; GFX8-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] 620; GFX8-NEXT: s_setpc_b64 s[30:31] 621; 622; GFX9-LABEL: v_fma_f64_fneg_all: 623; GFX9: ; %bb.0: 624; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 625; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] 626; GFX9-NEXT: s_setpc_b64 s[30:31] 627; 628; GFX10-LABEL: v_fma_f64_fneg_all: 629; GFX10: ; %bb.0: 630; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 631; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] 632; GFX10-NEXT: s_setpc_b64 s[30:31] 633; 634; GFX11-LABEL: v_fma_f64_fneg_all: 635; GFX11: ; %bb.0: 636; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 637; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], -v[4:5] 638; GFX11-NEXT: s_setpc_b64 s[30:31] 639 %neg.x = fneg double %x 640 %neg.y = fneg double %y 641 %neg.z = fneg double %z 642 %fma = call double @llvm.fma.f64(double %neg.x, double %neg.y, double %neg.z) 643 ret double %fma 644} 645 646define <2 x double> @v_fma_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) { 647; GFX6-LABEL: v_fma_v2f64: 648; GFX6: ; %bb.0: 649; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 650; GFX6-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9] 651; GFX6-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11] 652; GFX6-NEXT: s_setpc_b64 s[30:31] 653; 654; GFX8-LABEL: v_fma_v2f64: 655; GFX8: ; %bb.0: 656; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 657; GFX8-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9] 658; GFX8-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11] 659; GFX8-NEXT: s_setpc_b64 s[30:31] 660; 661; GFX9-LABEL: v_fma_v2f64: 662; GFX9: ; %bb.0: 663; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 664; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9] 665; GFX9-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11] 666; GFX9-NEXT: s_setpc_b64 s[30:31] 667; 668; GFX10-LABEL: v_fma_v2f64: 669; GFX10: ; %bb.0: 670; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 671; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9] 672; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11] 673; GFX10-NEXT: s_setpc_b64 s[30:31] 674; 675; GFX11-LABEL: v_fma_v2f64: 676; GFX11: ; %bb.0: 677; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 678; GFX11-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9] 679; GFX11-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11] 680; GFX11-NEXT: s_setpc_b64 s[30:31] 681 %fma = call <2 x double> @llvm.fma.v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) 682 ret <2 x double> %fma 683} 684 685define float @v_fma_f32_fabs_lhs(float %x, float %y, float %z) { 686; GFX6-LABEL: v_fma_f32_fabs_lhs: 687; GFX6: ; %bb.0: 688; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 689; GFX6-NEXT: v_fma_f32 v0, |v0|, v1, v2 690; GFX6-NEXT: s_setpc_b64 s[30:31] 691; 692; GFX8-LABEL: v_fma_f32_fabs_lhs: 693; GFX8: ; %bb.0: 694; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 695; GFX8-NEXT: v_fma_f32 v0, |v0|, v1, v2 696; GFX8-NEXT: s_setpc_b64 s[30:31] 697; 698; GFX9-LABEL: v_fma_f32_fabs_lhs: 699; GFX9: ; %bb.0: 700; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 701; GFX9-NEXT: v_fma_f32 v0, |v0|, v1, v2 702; GFX9-NEXT: s_setpc_b64 s[30:31] 703; 704; GFX10-LABEL: v_fma_f32_fabs_lhs: 705; GFX10: ; %bb.0: 706; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 707; GFX10-NEXT: v_fma_f32 v0, |v0|, v1, v2 708; GFX10-NEXT: s_setpc_b64 s[30:31] 709; 710; GFX11-LABEL: v_fma_f32_fabs_lhs: 711; GFX11: ; %bb.0: 712; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 713; GFX11-NEXT: v_fma_f32 v0, |v0|, v1, v2 714; GFX11-NEXT: s_setpc_b64 s[30:31] 715 %fabs.x = call float @llvm.fabs.f32(float %x) 716 %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z) 717 ret float %fma 718} 719 720define float @v_fma_f32_fabs_rhs(float %x, float %y, float %z) { 721; GFX6-LABEL: v_fma_f32_fabs_rhs: 722; GFX6: ; %bb.0: 723; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 724; GFX6-NEXT: v_fma_f32 v0, v0, |v1|, v2 725; GFX6-NEXT: s_setpc_b64 s[30:31] 726; 727; GFX8-LABEL: v_fma_f32_fabs_rhs: 728; GFX8: ; %bb.0: 729; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 730; GFX8-NEXT: v_fma_f32 v0, v0, |v1|, v2 731; GFX8-NEXT: s_setpc_b64 s[30:31] 732; 733; GFX9-LABEL: v_fma_f32_fabs_rhs: 734; GFX9: ; %bb.0: 735; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 736; GFX9-NEXT: v_fma_f32 v0, v0, |v1|, v2 737; GFX9-NEXT: s_setpc_b64 s[30:31] 738; 739; GFX10-LABEL: v_fma_f32_fabs_rhs: 740; GFX10: ; %bb.0: 741; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 742; GFX10-NEXT: v_fma_f32 v0, v0, |v1|, v2 743; GFX10-NEXT: s_setpc_b64 s[30:31] 744; 745; GFX11-LABEL: v_fma_f32_fabs_rhs: 746; GFX11: ; %bb.0: 747; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 748; GFX11-NEXT: v_fma_f32 v0, v0, |v1|, v2 749; GFX11-NEXT: s_setpc_b64 s[30:31] 750 %fabs.y = call float @llvm.fabs.f32(float %y) 751 %fma = call float @llvm.fma.f32(float %x, float %fabs.y, float %z) 752 ret float %fma 753} 754 755define float @v_fma_f32_fabs_lhs_rhs(float %x, float %y, float %z) { 756; GFX6-LABEL: v_fma_f32_fabs_lhs_rhs: 757; GFX6: ; %bb.0: 758; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 759; GFX6-NEXT: v_fma_f32 v0, |v0|, |v1|, v2 760; GFX6-NEXT: s_setpc_b64 s[30:31] 761; 762; GFX8-LABEL: v_fma_f32_fabs_lhs_rhs: 763; GFX8: ; %bb.0: 764; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 765; GFX8-NEXT: v_fma_f32 v0, |v0|, |v1|, v2 766; GFX8-NEXT: s_setpc_b64 s[30:31] 767; 768; GFX9-LABEL: v_fma_f32_fabs_lhs_rhs: 769; GFX9: ; %bb.0: 770; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 771; GFX9-NEXT: v_fma_f32 v0, |v0|, |v1|, v2 772; GFX9-NEXT: s_setpc_b64 s[30:31] 773; 774; GFX10-LABEL: v_fma_f32_fabs_lhs_rhs: 775; GFX10: ; %bb.0: 776; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 777; GFX10-NEXT: v_fma_f32 v0, |v0|, |v1|, v2 778; GFX10-NEXT: s_setpc_b64 s[30:31] 779; 780; GFX11-LABEL: v_fma_f32_fabs_lhs_rhs: 781; GFX11: ; %bb.0: 782; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 783; GFX11-NEXT: v_fma_f32 v0, |v0|, |v1|, v2 784; GFX11-NEXT: s_setpc_b64 s[30:31] 785 %fabs.x = call float @llvm.fabs.f32(float %x) 786 %fabs.y = call float @llvm.fabs.f32(float %y) 787 %fma = call float @llvm.fma.f32(float %fabs.x, float %fabs.y, float %z) 788 ret float %fma 789} 790 791define amdgpu_ps float @v_fma_f32_sgpr_vgpr_vgpr(float inreg %x, float %y, float %z) { 792; GFX6-LABEL: v_fma_f32_sgpr_vgpr_vgpr: 793; GFX6: ; %bb.0: 794; GFX6-NEXT: v_fma_f32 v0, s0, v0, v1 795; GFX6-NEXT: ; return to shader part epilog 796; 797; GFX8-LABEL: v_fma_f32_sgpr_vgpr_vgpr: 798; GFX8: ; %bb.0: 799; GFX8-NEXT: v_fma_f32 v0, s0, v0, v1 800; GFX8-NEXT: ; return to shader part epilog 801; 802; GFX9-LABEL: v_fma_f32_sgpr_vgpr_vgpr: 803; GFX9: ; %bb.0: 804; GFX9-NEXT: v_fma_f32 v0, s0, v0, v1 805; GFX9-NEXT: ; return to shader part epilog 806; 807; GFX10-LABEL: v_fma_f32_sgpr_vgpr_vgpr: 808; GFX10: ; %bb.0: 809; GFX10-NEXT: v_fma_f32 v0, s0, v0, v1 810; GFX10-NEXT: ; return to shader part epilog 811; 812; GFX11-LABEL: v_fma_f32_sgpr_vgpr_vgpr: 813; GFX11: ; %bb.0: 814; GFX11-NEXT: v_fma_f32 v0, s0, v0, v1 815; GFX11-NEXT: ; return to shader part epilog 816 %fma = call float @llvm.fma.f32(float %x, float %y, float %z) 817 ret float %fma 818} 819 820define amdgpu_ps float @v_fma_f32_vgpr_sgpr_vgpr(float %x, float inreg %y, float %z) { 821; GFX6-LABEL: v_fma_f32_vgpr_sgpr_vgpr: 822; GFX6: ; %bb.0: 823; GFX6-NEXT: v_fma_f32 v0, v0, s0, v1 824; GFX6-NEXT: ; return to shader part epilog 825; 826; GFX8-LABEL: v_fma_f32_vgpr_sgpr_vgpr: 827; GFX8: ; %bb.0: 828; GFX8-NEXT: v_fma_f32 v0, v0, s0, v1 829; GFX8-NEXT: ; return to shader part epilog 830; 831; GFX9-LABEL: v_fma_f32_vgpr_sgpr_vgpr: 832; GFX9: ; %bb.0: 833; GFX9-NEXT: v_fma_f32 v0, v0, s0, v1 834; GFX9-NEXT: ; return to shader part epilog 835; 836; GFX10-LABEL: v_fma_f32_vgpr_sgpr_vgpr: 837; GFX10: ; %bb.0: 838; GFX10-NEXT: v_fma_f32 v0, s0, v0, v1 839; GFX10-NEXT: ; return to shader part epilog 840; 841; GFX11-LABEL: v_fma_f32_vgpr_sgpr_vgpr: 842; GFX11: ; %bb.0: 843; GFX11-NEXT: v_fma_f32 v0, s0, v0, v1 844; GFX11-NEXT: ; return to shader part epilog 845 %fma = call float @llvm.fma.f32(float %x, float %y, float %z) 846 ret float %fma 847} 848 849define amdgpu_ps float @v_fma_f32_sgpr_sgpr_sgpr(float inreg %x, float inreg %y, float inreg %z) { 850; GFX6-LABEL: v_fma_f32_sgpr_sgpr_sgpr: 851; GFX6: ; %bb.0: 852; GFX6-NEXT: v_mov_b32_e32 v0, s1 853; GFX6-NEXT: v_mov_b32_e32 v1, s2 854; GFX6-NEXT: v_fma_f32 v0, s0, v0, v1 855; GFX6-NEXT: ; return to shader part epilog 856; 857; GFX8-LABEL: v_fma_f32_sgpr_sgpr_sgpr: 858; GFX8: ; %bb.0: 859; GFX8-NEXT: v_mov_b32_e32 v0, s1 860; GFX8-NEXT: v_mov_b32_e32 v1, s2 861; GFX8-NEXT: v_fma_f32 v0, s0, v0, v1 862; GFX8-NEXT: ; return to shader part epilog 863; 864; GFX9-LABEL: v_fma_f32_sgpr_sgpr_sgpr: 865; GFX9: ; %bb.0: 866; GFX9-NEXT: v_mov_b32_e32 v0, s1 867; GFX9-NEXT: v_mov_b32_e32 v1, s2 868; GFX9-NEXT: v_fma_f32 v0, s0, v0, v1 869; GFX9-NEXT: ; return to shader part epilog 870; 871; GFX10-LABEL: v_fma_f32_sgpr_sgpr_sgpr: 872; GFX10: ; %bb.0: 873; GFX10-NEXT: v_mov_b32_e32 v0, s2 874; GFX10-NEXT: v_fma_f32 v0, s1, s0, v0 875; GFX10-NEXT: ; return to shader part epilog 876; 877; GFX11-LABEL: v_fma_f32_sgpr_sgpr_sgpr: 878; GFX11: ; %bb.0: 879; GFX11-NEXT: v_mov_b32_e32 v0, s2 880; GFX11-NEXT: v_fma_f32 v0, s1, s0, v0 881; GFX11-NEXT: ; return to shader part epilog 882 %fma = call float @llvm.fma.f32(float %x, float %y, float %z) 883 ret float %fma 884} 885 886define float @v_fma_f32_fneg_lhs(float %x, float %y, float %z) { 887; GFX6-LABEL: v_fma_f32_fneg_lhs: 888; GFX6: ; %bb.0: 889; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 890; GFX6-NEXT: v_fma_f32 v0, -v0, v1, v2 891; GFX6-NEXT: s_setpc_b64 s[30:31] 892; 893; GFX8-LABEL: v_fma_f32_fneg_lhs: 894; GFX8: ; %bb.0: 895; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 896; GFX8-NEXT: v_fma_f32 v0, -v0, v1, v2 897; GFX8-NEXT: s_setpc_b64 s[30:31] 898; 899; GFX9-LABEL: v_fma_f32_fneg_lhs: 900; GFX9: ; %bb.0: 901; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 902; GFX9-NEXT: v_fma_f32 v0, -v0, v1, v2 903; GFX9-NEXT: s_setpc_b64 s[30:31] 904; 905; GFX10-LABEL: v_fma_f32_fneg_lhs: 906; GFX10: ; %bb.0: 907; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 908; GFX10-NEXT: v_fma_f32 v0, -v0, v1, v2 909; GFX10-NEXT: s_setpc_b64 s[30:31] 910; 911; GFX11-LABEL: v_fma_f32_fneg_lhs: 912; GFX11: ; %bb.0: 913; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 914; GFX11-NEXT: v_fma_f32 v0, -v0, v1, v2 915; GFX11-NEXT: s_setpc_b64 s[30:31] 916 %neg.x = fneg float %x 917 %fma = call float @llvm.fma.f32(float %neg.x, float %y, float %z) 918 ret float %fma 919} 920 921define float @v_fma_f32_fneg_rhs(float %x, float %y, float %z) { 922; GFX6-LABEL: v_fma_f32_fneg_rhs: 923; GFX6: ; %bb.0: 924; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 925; GFX6-NEXT: v_fma_f32 v0, v0, -v1, v2 926; GFX6-NEXT: s_setpc_b64 s[30:31] 927; 928; GFX8-LABEL: v_fma_f32_fneg_rhs: 929; GFX8: ; %bb.0: 930; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 931; GFX8-NEXT: v_fma_f32 v0, v0, -v1, v2 932; GFX8-NEXT: s_setpc_b64 s[30:31] 933; 934; GFX9-LABEL: v_fma_f32_fneg_rhs: 935; GFX9: ; %bb.0: 936; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 937; GFX9-NEXT: v_fma_f32 v0, v0, -v1, v2 938; GFX9-NEXT: s_setpc_b64 s[30:31] 939; 940; GFX10-LABEL: v_fma_f32_fneg_rhs: 941; GFX10: ; %bb.0: 942; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 943; GFX10-NEXT: v_fma_f32 v0, v0, -v1, v2 944; GFX10-NEXT: s_setpc_b64 s[30:31] 945; 946; GFX11-LABEL: v_fma_f32_fneg_rhs: 947; GFX11: ; %bb.0: 948; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 949; GFX11-NEXT: v_fma_f32 v0, v0, -v1, v2 950; GFX11-NEXT: s_setpc_b64 s[30:31] 951 %neg.y = fneg float %y 952 %fma = call float @llvm.fma.f32(float %x, float %neg.y, float %z) 953 ret float %fma 954} 955 956define float @v_fma_f32_fneg_z(float %x, float %y, float %z) { 957; GFX6-LABEL: v_fma_f32_fneg_z: 958; GFX6: ; %bb.0: 959; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 960; GFX6-NEXT: v_fma_f32 v0, v0, v1, -v2 961; GFX6-NEXT: s_setpc_b64 s[30:31] 962; 963; GFX8-LABEL: v_fma_f32_fneg_z: 964; GFX8: ; %bb.0: 965; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 966; GFX8-NEXT: v_fma_f32 v0, v0, v1, -v2 967; GFX8-NEXT: s_setpc_b64 s[30:31] 968; 969; GFX9-LABEL: v_fma_f32_fneg_z: 970; GFX9: ; %bb.0: 971; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 972; GFX9-NEXT: v_fma_f32 v0, v0, v1, -v2 973; GFX9-NEXT: s_setpc_b64 s[30:31] 974; 975; GFX10-LABEL: v_fma_f32_fneg_z: 976; GFX10: ; %bb.0: 977; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 978; GFX10-NEXT: v_fma_f32 v0, v0, v1, -v2 979; GFX10-NEXT: s_setpc_b64 s[30:31] 980; 981; GFX11-LABEL: v_fma_f32_fneg_z: 982; GFX11: ; %bb.0: 983; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 984; GFX11-NEXT: v_fma_f32 v0, v0, v1, -v2 985; GFX11-NEXT: s_setpc_b64 s[30:31] 986 %neg.z = fneg float %z 987 %fma = call float @llvm.fma.f32(float %x, float %y, float %neg.z) 988 ret float %fma 989} 990 991define amdgpu_ps float @dont_crash_after_fma_mix_select_attempt(float inreg %x, float %y, float %z) { 992; GFX6-LABEL: dont_crash_after_fma_mix_select_attempt: 993; GFX6: ; %bb.0: ; %.entry 994; GFX6-NEXT: v_fma_f32 v0, |s0|, v0, v1 995; GFX6-NEXT: ; return to shader part epilog 996; 997; GFX8-LABEL: dont_crash_after_fma_mix_select_attempt: 998; GFX8: ; %bb.0: ; %.entry 999; GFX8-NEXT: v_fma_f32 v0, |s0|, v0, v1 1000; GFX8-NEXT: ; return to shader part epilog 1001; 1002; GFX9-LABEL: dont_crash_after_fma_mix_select_attempt: 1003; GFX9: ; %bb.0: ; %.entry 1004; GFX9-NEXT: v_fma_f32 v0, |s0|, v0, v1 1005; GFX9-NEXT: ; return to shader part epilog 1006; 1007; GFX10-LABEL: dont_crash_after_fma_mix_select_attempt: 1008; GFX10: ; %bb.0: ; %.entry 1009; GFX10-NEXT: v_fma_f32 v0, |s0|, v0, v1 1010; GFX10-NEXT: ; return to shader part epilog 1011; 1012; GFX11-LABEL: dont_crash_after_fma_mix_select_attempt: 1013; GFX11: ; %bb.0: ; %.entry 1014; GFX11-NEXT: v_fma_f32 v0, |s0|, v0, v1 1015; GFX11-NEXT: ; return to shader part epilog 1016.entry: 1017 %fabs.x = call contract float @llvm.fabs.f32(float %x) 1018 %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z) 1019 ret float %fma 1020} 1021 1022declare half @llvm.fma.f16(half, half, half) #0 1023declare float @llvm.fma.f32(float, float, float) #0 1024declare double @llvm.fma.f64(double, double, double) #0 1025 1026declare half @llvm.fabs.f16(half) #0 1027declare float @llvm.fabs.f32(float) #0 1028 1029declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) #0 1030declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) #0 1031declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) #0 1032 1033declare <3 x half> @llvm.fma.v3f16(<3 x half>, <3 x half>, <3 x half>) #0 1034declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) #0 1035 1036attributes #0 = { nounwind readnone speculatable willreturn } 1037