1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,SDAG-GFX1100 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic -verify-machineinstrs --amdhsa-code-object-version=6 < %s | FileCheck -check-prefixes=GFX9GEN,SDAG-GFX9GEN %s 6; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s 7; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s 8 9; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,GISEL-GFX1100 %s 10; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s 11; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s 12; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9GEN,GISEL-GFX9GEN %s 13; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s 14; RUN: llc -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s 15 16define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { 17; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: 18; GFX1100: ; %bb.0: 19; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] 21; GFX1100-NEXT: s_setpc_b64 s[30:31] 22; 23; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: 24; GFX900: ; %bb.0: 25; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] 27; GFX900-NEXT: s_setpc_b64 s[30:31] 28; 29; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: 30; GFX906: ; %bb.0: 31; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 32; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] 33; GFX906-NEXT: s_setpc_b64 s[30:31] 34; 35; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: 36; GFX9GEN: ; %bb.0: 37; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 38; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0 39; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 40; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 41; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 42; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 43; 44; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: 45; VI: ; %bb.0: 46; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 47; VI-NEXT: v_cvt_f32_f16_e32 v3, v0 48; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 49; VI-NEXT: v_cvt_f32_f16_e32 v0, v2 50; VI-NEXT: v_mac_f32_e32 v0, v3, v1 51; VI-NEXT: s_setpc_b64 s[30:31] 52; 53; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: 54; SDAG-CI: ; %bb.0: 55; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 57; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 58; 59; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo: 60; GISEL-CI: ; %bb.0: 61; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 62; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0 63; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 64; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 65; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 66; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 67 %src0.ext = fpext half %src0 to float 68 %src1.ext = fpext half %src1 to float 69 %src2.ext = fpext half %src2 to float 70 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 71 ret float %result 72} 73 74define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src2) #0 { 75; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int: 76; GFX1100: ; %bb.0: 77; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 78; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 79; GFX1100-NEXT: s_setpc_b64 s[30:31] 80; 81; GFX900-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int: 82; GFX900: ; %bb.0: 83; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 84; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 85; GFX900-NEXT: s_setpc_b64 s[30:31] 86; 87; GFX906-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int: 88; GFX906: ; %bb.0: 89; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 90; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 91; GFX906-NEXT: s_setpc_b64 s[30:31] 92; 93; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int: 94; GFX9GEN: ; %bb.0: 95; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 96; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 97; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 98; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 99; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 100; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 101; 102; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int: 103; VI: ; %bb.0: 104; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 105; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 106; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 107; VI-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 108; VI-NEXT: v_mac_f32_e32 v0, v3, v1 109; VI-NEXT: s_setpc_b64 s[30:31] 110; 111; CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int: 112; CI: ; %bb.0: 113; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 114; CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 115; CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 116; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 117; CI-NEXT: v_cvt_f32_f16_e32 v3, v0 118; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 119; CI-NEXT: v_cvt_f32_f16_e32 v0, v2 120; CI-NEXT: v_mac_f32_e32 v0, v3, v1 121; CI-NEXT: s_setpc_b64 s[30:31] 122 %src0.hi = lshr i32 %src0, 16 123 %src1.hi = lshr i32 %src1, 16 124 %src2.hi = lshr i32 %src2, 16 125 %src0.i16 = trunc i32 %src0.hi to i16 126 %src1.i16 = trunc i32 %src1.hi to i16 127 %src2.i16 = trunc i32 %src2.hi to i16 128 %src0.fp16 = bitcast i16 %src0.i16 to half 129 %src1.fp16 = bitcast i16 %src1.i16 to half 130 %src2.fp16 = bitcast i16 %src2.i16 to half 131 %src0.ext = fpext half %src0.fp16 to float 132 %src1.ext = fpext half %src1.fp16 to float 133 %src2.ext = fpext half %src2.fp16 to float 134 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 135 ret float %result 136} 137 138define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 139; GFX1100-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: 140; GFX1100: ; %bb.0: 141; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 143; GFX1100-NEXT: s_setpc_b64 s[30:31] 144; 145; GFX900-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: 146; GFX900: ; %bb.0: 147; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 148; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 149; GFX900-NEXT: s_setpc_b64 s[30:31] 150; 151; GFX906-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: 152; GFX906: ; %bb.0: 153; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 154; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 155; GFX906-NEXT: s_setpc_b64 s[30:31] 156; 157; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: 158; GFX9GEN: ; %bb.0: 159; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 160; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 161; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 162; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 163; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 164; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 165; 166; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: 167; VI: ; %bb.0: 168; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 169; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 170; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 171; VI-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 172; VI-NEXT: v_mac_f32_e32 v0, v3, v1 173; VI-NEXT: s_setpc_b64 s[30:31] 174; 175; SDAG-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: 176; SDAG-CI: ; %bb.0: 177; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5 179; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 180; 181; GISEL-CI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt: 182; GISEL-CI: ; %bb.0: 183; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 184; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 185; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3 186; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v5 187; GISEL-CI-NEXT: v_mac_f32_e32 v0, v1, v2 188; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 189 %src0.hi = extractelement <2 x half> %src0, i32 1 190 %src1.hi = extractelement <2 x half> %src1, i32 1 191 %src2.hi = extractelement <2 x half> %src2, i32 1 192 %src0.ext = fpext half %src0.hi to float 193 %src1.ext = fpext half %src1.hi to float 194 %src2.ext = fpext half %src2.hi to float 195 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 196 ret float %result 197} 198 199define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 200; GFX1100-LABEL: v_mad_mix_v2f32: 201; GFX1100: ; %bb.0: 202; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 203; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] 204; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 205; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) 206; GFX1100-NEXT: v_mov_b32_e32 v0, v3 207; GFX1100-NEXT: s_setpc_b64 s[30:31] 208; 209; SDAG-GFX900-LABEL: v_mad_mix_v2f32: 210; SDAG-GFX900: ; %bb.0: 211; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 212; SDAG-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 213; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] 214; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v3 215; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] 216; 217; SDAG-GFX906-LABEL: v_mad_mix_v2f32: 218; SDAG-GFX906: ; %bb.0: 219; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 220; SDAG-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 221; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] 222; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v3 223; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] 224; 225; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32: 226; SDAG-GFX9GEN: ; %bb.0: 227; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 228; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 229; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v4, v0 230; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 231; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v6, v1 232; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 233; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 234; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v1, v3, v5 235; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v0, v4, v6 236; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 237; 238; SDAG-VI-LABEL: v_mad_mix_v2f32: 239; SDAG-VI: ; %bb.0: 240; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 241; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 242; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v4, v0 243; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 244; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v6, v1 245; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 246; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 247; SDAG-VI-NEXT: v_mac_f32_e32 v1, v3, v5 248; SDAG-VI-NEXT: v_mac_f32_e32 v0, v4, v6 249; SDAG-VI-NEXT: s_setpc_b64 s[30:31] 250; 251; SDAG-CI-LABEL: v_mad_mix_v2f32: 252; SDAG-CI: ; %bb.0: 253; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 254; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v5, v5 255; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v3 256; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 257; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v4, v4 258; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v5 259; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v5, v6 260; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 261; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v6, v0 262; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 263; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v4 264; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 265; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v6 266; SDAG-CI-NEXT: v_mac_f32_e32 v3, v1, v5 267; SDAG-CI-NEXT: v_mov_b32_e32 v1, v3 268; SDAG-CI-NEXT: v_mac_f32_e32 v0, v4, v2 269; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 270; 271; GISEL-GFX900-LABEL: v_mad_mix_v2f32: 272; GISEL-GFX900: ; %bb.0: 273; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 274; GISEL-GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] 275; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 276; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v3 277; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] 278; 279; GISEL-GFX906-LABEL: v_mad_mix_v2f32: 280; GISEL-GFX906: ; %bb.0: 281; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 282; GISEL-GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel_hi:[1,1,1] 283; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] 284; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3 285; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] 286; 287; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32: 288; GISEL-GFX9GEN: ; %bb.0: 289; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 290; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0 291; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 292; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v5, v1 293; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 294; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 295; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 296; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v5 297; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v1, v4, v6 298; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 299; 300; GISEL-VI-LABEL: v_mad_mix_v2f32: 301; GISEL-VI: ; %bb.0: 302; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 303; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v3, v0 304; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 305; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v5, v1 306; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 307; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 308; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 309; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v5 310; GISEL-VI-NEXT: v_mac_f32_e32 v1, v4, v6 311; GISEL-VI-NEXT: s_setpc_b64 s[30:31] 312; 313; GISEL-CI-LABEL: v_mad_mix_v2f32: 314; GISEL-CI: ; %bb.0: 315; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 316; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v6, v0 317; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v7, v1 318; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 319; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 320; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v4 321; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v5 322; GISEL-CI-NEXT: v_mac_f32_e32 v0, v6, v2 323; GISEL-CI-NEXT: v_mac_f32_e32 v1, v7, v3 324; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 325 %src0.ext = fpext <2 x half> %src0 to <2 x float> 326 %src1.ext = fpext <2 x half> %src1 to <2 x float> 327 %src2.ext = fpext <2 x half> %src2 to <2 x float> 328 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 329 ret <2 x float> %result 330} 331 332define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 333; GFX1100-LABEL: v_mad_mix_v2f32_shuffle: 334; GFX1100: ; %bb.0: 335; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 336; GFX1100-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1] 337; GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1] 338; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) 339; GFX1100-NEXT: v_mov_b32_e32 v0, v3 340; GFX1100-NEXT: s_setpc_b64 s[30:31] 341; 342; GFX900-LABEL: v_mad_mix_v2f32_shuffle: 343; GFX900: ; %bb.0: 344; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 345; GFX900-NEXT: v_mad_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1] 346; GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1] 347; GFX900-NEXT: v_mov_b32_e32 v0, v3 348; GFX900-NEXT: s_setpc_b64 s[30:31] 349; 350; GFX906-LABEL: v_mad_mix_v2f32_shuffle: 351; GFX906: ; %bb.0: 352; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 353; GFX906-NEXT: v_fma_mix_f32 v3, v0, v1, v2 op_sel:[1,0,1] op_sel_hi:[1,1,1] 354; GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v2 op_sel:[0,1,1] op_sel_hi:[1,1,1] 355; GFX906-NEXT: v_mov_b32_e32 v0, v3 356; GFX906-NEXT: s_setpc_b64 s[30:31] 357; 358; GFX9GEN-LABEL: v_mad_mix_v2f32_shuffle: 359; GFX9GEN: ; %bb.0: 360; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 361; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 362; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v4, v0 363; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1 364; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 365; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 366; GFX9GEN-NEXT: v_mad_f32 v0, v3, v0, v2 367; GFX9GEN-NEXT: v_mac_f32_e32 v2, v4, v1 368; GFX9GEN-NEXT: v_mov_b32_e32 v1, v2 369; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 370; 371; VI-LABEL: v_mad_mix_v2f32_shuffle: 372; VI: ; %bb.0: 373; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 374; VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 375; VI-NEXT: v_cvt_f32_f16_e32 v4, v0 376; VI-NEXT: v_cvt_f32_f16_e32 v0, v1 377; VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 378; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 379; VI-NEXT: v_mad_f32 v0, v3, v0, v2 380; VI-NEXT: v_mac_f32_e32 v2, v4, v1 381; VI-NEXT: v_mov_b32_e32 v1, v2 382; VI-NEXT: s_setpc_b64 s[30:31] 383; 384; SDAG-CI-LABEL: v_mad_mix_v2f32_shuffle: 385; SDAG-CI: ; %bb.0: 386; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 387; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 388; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 389; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 390; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 391; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 392; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 393; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 394; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v0 395; SDAG-CI-NEXT: v_mad_f32 v0, v1, v2, v5 396; SDAG-CI-NEXT: v_mad_f32 v1, v4, v3, v5 397; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 398; 399; GISEL-CI-LABEL: v_mad_mix_v2f32_shuffle: 400; GISEL-CI: ; %bb.0: 401; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 402; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 403; GISEL-CI-NEXT: v_and_b32_e32 v0, 0xffff, v0 404; GISEL-CI-NEXT: v_or_b32_e32 v0, v1, v0 405; GISEL-CI-NEXT: v_lshlrev_b32_e32 v1, 16, v5 406; GISEL-CI-NEXT: v_and_b32_e32 v4, 0xffff, v4 407; GISEL-CI-NEXT: v_or_b32_e32 v1, v1, v4 408; GISEL-CI-NEXT: v_lshrrev_b32_e32 v4, 16, v0 409; GISEL-CI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 410; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v4 411; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v5, v0 412; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 413; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 414; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v3 415; GISEL-CI-NEXT: v_mad_f32 v0, v4, v0, v1 416; GISEL-CI-NEXT: v_mac_f32_e32 v1, v5, v2 417; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 418 %src0.shuf = shufflevector <2 x half> %src0, <2 x half> undef, <2 x i32> <i32 1, i32 0> 419 %src1.shuf = shufflevector <2 x half> %src1, <2 x half> undef, <2 x i32> <i32 0, i32 1> 420 %src2.shuf = shufflevector <2 x half> %src2, <2 x half> undef, <2 x i32> <i32 1, i32 1> 421 %src0.ext = fpext <2 x half> %src0.shuf to <2 x float> 422 %src1.ext = fpext <2 x half> %src1.shuf to <2 x float> 423 %src2.ext = fpext <2 x half> %src2.shuf to <2 x float> 424 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2.ext) 425 ret <2 x float> %result 426} 427 428define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { 429; GFX1100-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: 430; GFX1100: ; %bb.0: 431; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 432; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] 433; GFX1100-NEXT: s_setpc_b64 s[30:31] 434; 435; GFX900-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: 436; GFX900: ; %bb.0: 437; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 438; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] 439; GFX900-NEXT: s_setpc_b64 s[30:31] 440; 441; GFX906-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: 442; GFX906: ; %bb.0: 443; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 444; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] 445; GFX906-NEXT: s_setpc_b64 s[30:31] 446; 447; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: 448; SDAG-GFX9GEN: ; %bb.0: 449; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 450; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 451; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 452; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 453; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2 454; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 455; 456; SDAG-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: 457; SDAG-VI: ; %bb.0: 458; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 459; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 460; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 461; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 462; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2 463; SDAG-VI-NEXT: s_setpc_b64 s[30:31] 464; 465; SDAG-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: 466; SDAG-CI: ; %bb.0: 467; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 468; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2 469; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 470; 471; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: 472; GISEL-GFX9GEN: ; %bb.0: 473; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 474; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e64 v3, -v0 475; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 476; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 477; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 478; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 479; 480; GISEL-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: 481; GISEL-VI: ; %bb.0: 482; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 483; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0 484; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 485; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 486; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1 487; GISEL-VI-NEXT: s_setpc_b64 s[30:31] 488; 489; GISEL-CI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo: 490; GISEL-CI: ; %bb.0: 491; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 492; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0 493; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 494; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 495; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 496; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 497 %src0.ext = fpext half %src0 to float 498 %src1.ext = fpext half %src1 to float 499 %src2.ext = fpext half %src2 to float 500 %src0.ext.neg = fneg float %src0.ext 501 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg, float %src1.ext, float %src2.ext) 502 ret float %result 503} 504 505define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { 506; GFX1100-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: 507; GFX1100: ; %bb.0: 508; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 509; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] 510; GFX1100-NEXT: s_setpc_b64 s[30:31] 511; 512; GFX900-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: 513; GFX900: ; %bb.0: 514; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 515; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] 516; GFX900-NEXT: s_setpc_b64 s[30:31] 517; 518; GFX906-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: 519; GFX906: ; %bb.0: 520; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 521; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] 522; GFX906-NEXT: s_setpc_b64 s[30:31] 523; 524; GFX9GEN-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: 525; GFX9GEN: ; %bb.0: 526; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 527; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 528; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 529; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 530; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2 531; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 532; 533; VI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: 534; VI: ; %bb.0: 535; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 536; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 537; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 538; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 539; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2 540; VI-NEXT: s_setpc_b64 s[30:31] 541; 542; SDAG-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: 543; SDAG-CI: ; %bb.0: 544; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 545; SDAG-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2 546; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 547; 548; GISEL-CI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo: 549; GISEL-CI: ; %bb.0: 550; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 551; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 552; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 553; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 554; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2 555; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 556 %src0.ext = fpext half %src0 to float 557 %src1.ext = fpext half %src1 to float 558 %src2.ext = fpext half %src2 to float 559 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) 560 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext) 561 ret float %result 562} 563 564define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half %src2) #0 { 565; GFX1100-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: 566; GFX1100: ; %bb.0: 567; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 568; GFX1100-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1] 569; GFX1100-NEXT: s_setpc_b64 s[30:31] 570; 571; GFX900-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: 572; GFX900: ; %bb.0: 573; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 574; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1] 575; GFX900-NEXT: s_setpc_b64 s[30:31] 576; 577; GFX906-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: 578; GFX906: ; %bb.0: 579; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 580; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1] 581; GFX906-NEXT: s_setpc_b64 s[30:31] 582; 583; GFX9GEN-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: 584; GFX9GEN: ; %bb.0: 585; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 586; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 587; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 588; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 589; GFX9GEN-NEXT: v_mad_f32 v0, -|v0|, v1, v2 590; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 591; 592; VI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: 593; VI: ; %bb.0: 594; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 595; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 596; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 597; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 598; VI-NEXT: v_mad_f32 v0, -|v0|, v1, v2 599; VI-NEXT: s_setpc_b64 s[30:31] 600; 601; SDAG-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: 602; SDAG-CI: ; %bb.0: 603; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 604; SDAG-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2 605; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 606; 607; GISEL-CI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo: 608; GISEL-CI: ; %bb.0: 609; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 610; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 611; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 612; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 613; GISEL-CI-NEXT: v_mad_f32 v0, -|v0|, v1, v2 614; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 615 %src0.ext = fpext half %src0 to float 616 %src1.ext = fpext half %src1 to float 617 %src2.ext = fpext half %src2 to float 618 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) 619 %src0.ext.neg.abs = fneg float %src0.ext.abs 620 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg.abs, float %src1.ext, float %src2.ext) 621 ret float %result 622} 623 624define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2) #0 { 625; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: 626; GFX1100: ; %bb.0: 627; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 628; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 629; GFX1100-NEXT: s_setpc_b64 s[30:31] 630; 631; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: 632; GFX900: ; %bb.0: 633; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 634; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 635; GFX900-NEXT: s_setpc_b64 s[30:31] 636; 637; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: 638; GFX906: ; %bb.0: 639; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 640; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 641; GFX906-NEXT: s_setpc_b64 s[30:31] 642; 643; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: 644; GFX9GEN: ; %bb.0: 645; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 646; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 647; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 648; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2 649; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 650; 651; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: 652; VI: ; %bb.0: 653; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 654; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 655; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 656; VI-NEXT: v_mad_f32 v0, v0, v1, v2 657; VI-NEXT: s_setpc_b64 s[30:31] 658; 659; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: 660; SDAG-CI: ; %bb.0: 661; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 662; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 663; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 664; 665; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32: 666; GISEL-CI: ; %bb.0: 667; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 668; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 669; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 670; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 671; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 672 %src0.ext = fpext half %src0 to float 673 %src1.ext = fpext half %src1 to float 674 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 675 ret float %result 676} 677 678define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %src2) #0 { 679; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: 680; GFX1100: ; %bb.0: 681; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 682; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] 683; GFX1100-NEXT: s_setpc_b64 s[30:31] 684; 685; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: 686; GFX900: ; %bb.0: 687; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 688; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] 689; GFX900-NEXT: s_setpc_b64 s[30:31] 690; 691; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: 692; GFX906: ; %bb.0: 693; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 694; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0] 695; GFX906-NEXT: s_setpc_b64 s[30:31] 696; 697; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: 698; GFX9GEN: ; %bb.0: 699; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 700; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 701; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 702; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, -v2 703; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 704; 705; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: 706; VI: ; %bb.0: 707; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 708; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 709; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 710; VI-NEXT: v_mad_f32 v0, v0, v1, -v2 711; VI-NEXT: s_setpc_b64 s[30:31] 712; 713; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: 714; SDAG-CI: ; %bb.0: 715; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 716; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -v2 717; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 718; 719; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32: 720; GISEL-CI: ; %bb.0: 721; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 722; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 723; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 724; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -v2 725; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 726 %src0.ext = fpext half %src0 to float 727 %src1.ext = fpext half %src1 to float 728 %src2.neg = fneg float %src2 729 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg) 730 ret float %result 731} 732 733define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %src2) #0 { 734; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: 735; GFX1100: ; %bb.0: 736; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 737; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] 738; GFX1100-NEXT: s_setpc_b64 s[30:31] 739; 740; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: 741; GFX900: ; %bb.0: 742; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 743; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] 744; GFX900-NEXT: s_setpc_b64 s[30:31] 745; 746; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: 747; GFX906: ; %bb.0: 748; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 749; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0] 750; GFX906-NEXT: s_setpc_b64 s[30:31] 751; 752; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: 753; GFX9GEN: ; %bb.0: 754; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 755; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 756; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 757; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, |v2| 758; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 759; 760; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: 761; VI: ; %bb.0: 762; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 763; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 764; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 765; VI-NEXT: v_mad_f32 v0, v0, v1, |v2| 766; VI-NEXT: s_setpc_b64 s[30:31] 767; 768; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: 769; SDAG-CI: ; %bb.0: 770; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 771; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, |v2| 772; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 773; 774; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32: 775; GISEL-CI: ; %bb.0: 776; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 777; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 778; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 779; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, |v2| 780; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 781 %src0.ext = fpext half %src0 to float 782 %src1.ext = fpext half %src1 to float 783 %src2.abs = call float @llvm.fabs.f32(float %src2) 784 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.abs) 785 ret float %result 786} 787 788define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float %src2) #0 { 789; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: 790; GFX1100: ; %bb.0: 791; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 792; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] 793; GFX1100-NEXT: s_setpc_b64 s[30:31] 794; 795; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: 796; GFX900: ; %bb.0: 797; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 798; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] 799; GFX900-NEXT: s_setpc_b64 s[30:31] 800; 801; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: 802; GFX906: ; %bb.0: 803; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 804; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0] 805; GFX906-NEXT: s_setpc_b64 s[30:31] 806; 807; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: 808; GFX9GEN: ; %bb.0: 809; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 810; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 811; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 812; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, -|v2| 813; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 814; 815; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: 816; VI: ; %bb.0: 817; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 818; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 819; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 820; VI-NEXT: v_mad_f32 v0, v0, v1, -|v2| 821; VI-NEXT: s_setpc_b64 s[30:31] 822; 823; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: 824; SDAG-CI: ; %bb.0: 825; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 826; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2| 827; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 828; 829; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32: 830; GISEL-CI: ; %bb.0: 831; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 832; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 833; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 834; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, -|v2| 835; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 836 %src0.ext = fpext half %src0 to float 837 %src1.ext = fpext half %src1 to float 838 %src2.abs = call float @llvm.fabs.f32(float %src2) 839 %src2.neg.abs = fneg float %src2.abs 840 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg.abs) 841 ret float %result 842} 843 844; TODO: Fold inline immediates. Need to be careful because it is an 845; f16 inline immediate that may be converted to f32, not an actual f32 846; inline immediate. 847 848define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 { 849; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: 850; SDAG-GFX1100: ; %bb.0: 851; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 852; SDAG-GFX1100-NEXT: s_mov_b32 s0, 1.0 853; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 854; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0] 855; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31] 856; 857; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: 858; SDAG-GFX900: ; %bb.0: 859; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 860; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0 861; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] 862; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] 863; 864; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: 865; SDAG-GFX906: ; %bb.0: 866; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 867; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0 868; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] 869; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] 870; 871; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: 872; GFX9GEN: ; %bb.0: 873; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 874; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 875; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 876; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, 1.0 877; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 878; 879; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: 880; VI: ; %bb.0: 881; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 882; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 883; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 884; VI-NEXT: v_mad_f32 v0, v0, v1, 1.0 885; VI-NEXT: s_setpc_b64 s[30:31] 886; 887; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: 888; SDAG-CI: ; %bb.0: 889; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 890; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0 891; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 892; 893; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: 894; GISEL-GFX1100: ; %bb.0: 895; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 896; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 1.0 897; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 898; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 899; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31] 900; 901; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: 902; GISEL-GFX900: ; %bb.0: 903; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 904; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 1.0 905; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 906; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] 907; 908; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: 909; GISEL-GFX906: ; %bb.0: 910; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 911; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 1.0 912; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 913; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] 914; 915; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1: 916; GISEL-CI: ; %bb.0: 917; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 918; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 919; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 920; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, 1.0 921; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 922 %src0.ext = fpext half %src0 to float 923 %src1.ext = fpext half %src1 to float 924 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 1.0) 925 ret float %result 926} 927 928define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0 { 929; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: 930; SDAG-GFX1100: ; %bb.0: 931; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 932; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0.15915494 933; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 934; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0] 935; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31] 936; 937; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: 938; SDAG-GFX900: ; %bb.0: 939; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 940; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494 941; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] 942; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] 943; 944; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: 945; SDAG-GFX906: ; %bb.0: 946; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 947; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494 948; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] 949; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] 950; 951; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: 952; GFX9GEN: ; %bb.0: 953; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 954; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 955; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 956; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, 0.15915494 957; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 958; 959; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: 960; VI: ; %bb.0: 961; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 962; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 963; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 964; VI-NEXT: v_mad_f32 v0, v0, v1, 0.15915494 965; VI-NEXT: s_setpc_b64 s[30:31] 966; 967; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: 968; SDAG-CI: ; %bb.0: 969; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 970; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e22f983 971; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 972; 973; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: 974; GISEL-GFX1100: ; %bb.0: 975; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 976; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0.15915494 977; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 978; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 979; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31] 980; 981; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: 982; GISEL-GFX900: ; %bb.0: 983; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 984; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0.15915494 985; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 986; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] 987; 988; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: 989; GISEL-GFX906: ; %bb.0: 990; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 991; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0.15915494 992; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 993; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] 994; 995; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi: 996; GISEL-CI: ; %bb.0: 997; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 998; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0 999; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 1000; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e22f983 1001; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1 1002; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 1003 %src0.ext = fpext half %src0 to float 1004 %src1.ext = fpext half %src1 to float 1005 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float 0x3FC45F3060000000) 1006 ret float %result 1007} 1008 1009; Attempt to break inline immediate folding. If the operand is 1010; interpreted as f32, the inline immediate is really the f16 inline 1011; imm value converted to f32. 1012; fpext f16 1/2pi = 0x3e230000 1013; f32 1/2pi = 0x3e22f983 1014 1015define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1) #0 { 1016; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: 1017; SDAG-GFX1100: ; %bb.0: 1018; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1019; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x3e230000 1020; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1021; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0] 1022; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31] 1023; 1024; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: 1025; SDAG-GFX900: ; %bb.0: 1026; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1027; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000 1028; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] 1029; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] 1030; 1031; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: 1032; SDAG-GFX906: ; %bb.0: 1033; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1034; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000 1035; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] 1036; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] 1037; 1038; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: 1039; SDAG-GFX9GEN: ; %bb.0: 1040; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1041; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 1042; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 1043; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000 1044; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1045; 1046; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: 1047; SDAG-VI: ; %bb.0: 1048; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1049; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 1050; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 1051; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000 1052; SDAG-VI-NEXT: s_setpc_b64 s[30:31] 1053; 1054; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: 1055; SDAG-CI: ; %bb.0: 1056; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1057; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000 1058; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 1059; 1060; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: 1061; GISEL-GFX1100: ; %bb.0: 1062; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1063; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0x3e230000 1064; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 1065; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 1066; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31] 1067; 1068; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: 1069; GISEL-GFX900: ; %bb.0: 1070; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1071; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x3e230000 1072; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 1073; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] 1074; 1075; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: 1076; GISEL-GFX906: ; %bb.0: 1077; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1078; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x3e230000 1079; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 1080; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] 1081; 1082; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: 1083; GISEL-GFX9GEN: ; %bb.0: 1084; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1085; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0 1086; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 1087; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v0, 0x3e230000 1088; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v2, v1 1089; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1090; 1091; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: 1092; GISEL-VI: ; %bb.0: 1093; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1094; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0 1095; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 1096; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x3e230000 1097; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1 1098; GISEL-VI-NEXT: s_setpc_b64 s[30:31] 1099; 1100; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi: 1101; GISEL-CI: ; %bb.0: 1102; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1103; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0 1104; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 1105; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x3e230000 1106; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1 1107; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 1108 %src0.ext = fpext half %src0 to float 1109 %src1.ext = fpext half %src1 to float 1110 %src2 = fpext half 0xH3118 to float 1111 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 1112 ret float %result 1113} 1114 1115 1116define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 { 1117; SDAG-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: 1118; SDAG-GFX1100: ; %bb.0: 1119; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1120; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x367c0000 1121; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1122; SDAG-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, s0 op_sel_hi:[1,1,0] 1123; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31] 1124; 1125; SDAG-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: 1126; SDAG-GFX900: ; %bb.0: 1127; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1128; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x367c0000 1129; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] 1130; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] 1131; 1132; SDAG-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: 1133; SDAG-GFX906: ; %bb.0: 1134; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1135; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x367c0000 1136; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] 1137; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] 1138; 1139; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: 1140; SDAG-GFX9GEN: ; %bb.0: 1141; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1142; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 1143; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 1144; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000 1145; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1146; 1147; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: 1148; SDAG-VI: ; %bb.0: 1149; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1150; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 1151; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 1152; SDAG-VI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000 1153; SDAG-VI-NEXT: s_setpc_b64 s[30:31] 1154; 1155; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: 1156; SDAG-CI: ; %bb.0: 1157; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1158; SDAG-CI-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000 1159; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 1160; 1161; GISEL-GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: 1162; GISEL-GFX1100: ; %bb.0: 1163; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1164; GISEL-GFX1100-NEXT: v_mov_b32_e32 v2, 0x367c0000 1165; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) 1166; GISEL-GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 1167; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31] 1168; 1169; GISEL-GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: 1170; GISEL-GFX900: ; %bb.0: 1171; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1172; GISEL-GFX900-NEXT: v_mov_b32_e32 v2, 0x367c0000 1173; GISEL-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 1174; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] 1175; 1176; GISEL-GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: 1177; GISEL-GFX906: ; %bb.0: 1178; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1179; GISEL-GFX906-NEXT: v_mov_b32_e32 v2, 0x367c0000 1180; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 1181; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] 1182; 1183; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: 1184; GISEL-GFX9GEN: ; %bb.0: 1185; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1186; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0 1187; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 1188; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v0, 0x367c0000 1189; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v2, v1 1190; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1191; 1192; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: 1193; GISEL-VI: ; %bb.0: 1194; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1195; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0 1196; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 1197; GISEL-VI-NEXT: v_mov_b32_e32 v0, 0x367c0000 1198; GISEL-VI-NEXT: v_mac_f32_e32 v0, v2, v1 1199; GISEL-VI-NEXT: s_setpc_b64 s[30:31] 1200; 1201; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63: 1202; GISEL-CI: ; %bb.0: 1203; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1204; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v0 1205; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 1206; GISEL-CI-NEXT: v_mov_b32_e32 v0, 0x367c0000 1207; GISEL-CI-NEXT: v_mac_f32_e32 v0, v2, v1 1208; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 1209 %src0.ext = fpext half %src0 to float 1210 %src1.ext = fpext half %src1 to float 1211 %src2 = fpext half 0xH003F to float 1212 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 1213 ret float %result 1214} 1215 1216define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1) #0 { 1217; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_f32imm1: 1218; SDAG-GFX1100: ; %bb.0: 1219; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1220; SDAG-GFX1100-NEXT: s_mov_b32 s0, 1.0 1221; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1222; SDAG-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0] 1223; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1224; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v2 1225; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31] 1226; 1227; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imm1: 1228; SDAG-GFX900: ; %bb.0: 1229; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1230; SDAG-GFX900-NEXT: s_mov_b32 s4, 1.0 1231; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1232; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] 1233; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2 1234; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] 1235; 1236; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imm1: 1237; SDAG-GFX906: ; %bb.0: 1238; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1239; SDAG-GFX906-NEXT: s_mov_b32 s4, 1.0 1240; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1241; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] 1242; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2 1243; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] 1244; 1245; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1: 1246; SDAG-GFX9GEN: ; %bb.0: 1247; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1248; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1249; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 1250; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1 1251; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1252; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, v0, v3, 1.0 1253; SDAG-GFX9GEN-NEXT: v_mad_f32 v1, v2, v1, 1.0 1254; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1255; 1256; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imm1: 1257; SDAG-VI: ; %bb.0: 1258; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1259; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1260; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 1261; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1 1262; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1263; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 1.0 1264; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 1.0 1265; SDAG-VI-NEXT: s_setpc_b64 s[30:31] 1266; 1267; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imm1: 1268; SDAG-CI: ; %bb.0: 1269; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1270; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 1271; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 1272; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 1273; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 1274; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 1275; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 1276; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 1277; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 1278; SDAG-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0 1279; SDAG-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0 1280; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 1281; 1282; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_f32imm1: 1283; GISEL-GFX1100: ; %bb.0: 1284; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1285; GISEL-GFX1100-NEXT: v_mov_b32_e32 v3, 1.0 1286; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1287; GISEL-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0] 1288; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1289; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v2 1290; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31] 1291; 1292; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imm1: 1293; GISEL-GFX900: ; %bb.0: 1294; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1295; GISEL-GFX900-NEXT: v_mov_b32_e32 v3, 1.0 1296; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0] 1297; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1298; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2 1299; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] 1300; 1301; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imm1: 1302; GISEL-GFX906: ; %bb.0: 1303; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1304; GISEL-GFX906-NEXT: v_mov_b32_e32 v3, 1.0 1305; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0] 1306; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1307; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2 1308; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] 1309; 1310; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1: 1311; GISEL-GFX9GEN: ; %bb.0: 1312; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1313; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0 1314; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1315; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1 1316; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1317; GISEL-GFX9GEN-NEXT: v_mad_f32 v0, v2, v0, 1.0 1318; GISEL-GFX9GEN-NEXT: v_mad_f32 v1, v3, v1, 1.0 1319; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1320; 1321; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imm1: 1322; GISEL-VI: ; %bb.0: 1323; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1324; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0 1325; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1326; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1 1327; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1328; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 1.0 1329; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 1.0 1330; GISEL-VI-NEXT: s_setpc_b64 s[30:31] 1331; 1332; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imm1: 1333; GISEL-CI: ; %bb.0: 1334; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1335; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 1336; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 1337; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 1338; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 1339; GISEL-CI-NEXT: v_mad_f32 v0, v0, v2, 1.0 1340; GISEL-CI-NEXT: v_mad_f32 v1, v1, v3, 1.0 1341; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 1342 %src0.ext = fpext <2 x half> %src0 to <2 x float> 1343 %src1.ext = fpext <2 x half> %src1 to <2 x float> 1344 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 1.0, float 1.0>) 1345 ret <2 x float> %result 1346} 1347 1348define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 { 1349; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: 1350; SDAG-GFX1100: ; %bb.0: 1351; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1352; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0x3e230000 1353; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1354; SDAG-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0] 1355; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1356; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v2 1357; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31] 1358; 1359; SDAG-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: 1360; SDAG-GFX900: ; %bb.0: 1361; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1362; SDAG-GFX900-NEXT: s_mov_b32 s4, 0x3e230000 1363; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1364; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] 1365; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2 1366; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] 1367; 1368; SDAG-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: 1369; SDAG-GFX906: ; %bb.0: 1370; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1371; SDAG-GFX906-NEXT: s_mov_b32 s4, 0x3e230000 1372; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1373; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] 1374; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2 1375; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] 1376; 1377; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: 1378; SDAG-GFX9GEN: ; %bb.0: 1379; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1380; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1381; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 1382; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1 1383; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1384; SDAG-GFX9GEN-NEXT: v_mov_b32_e32 v1, 0x3e230000 1385; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000 1386; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v1, v2, v4 1387; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1388; 1389; SDAG-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: 1390; SDAG-VI: ; %bb.0: 1391; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1392; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1393; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 1394; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1 1395; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1396; SDAG-VI-NEXT: v_mov_b32_e32 v1, 0x3e230000 1397; SDAG-VI-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000 1398; SDAG-VI-NEXT: v_mac_f32_e32 v1, v2, v4 1399; SDAG-VI-NEXT: s_setpc_b64 s[30:31] 1400; 1401; SDAG-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: 1402; SDAG-CI: ; %bb.0: 1403; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1404; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 1405; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 1406; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 1407; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 1408; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 1409; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 1410; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 1411; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1 1412; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e230000 1413; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e230000 1414; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3 1415; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 1416; 1417; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: 1418; GISEL-GFX1100: ; %bb.0: 1419; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1420; GISEL-GFX1100-NEXT: v_mov_b32_e32 v3, 0x3e230000 1421; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1422; GISEL-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0] 1423; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1424; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v2 1425; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31] 1426; 1427; GISEL-GFX900-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: 1428; GISEL-GFX900: ; %bb.0: 1429; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1430; GISEL-GFX900-NEXT: v_mov_b32_e32 v3, 0x3e230000 1431; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0] 1432; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1433; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2 1434; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] 1435; 1436; GISEL-GFX906-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: 1437; GISEL-GFX906: ; %bb.0: 1438; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1439; GISEL-GFX906-NEXT: v_mov_b32_e32 v3, 0x3e230000 1440; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0] 1441; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1442; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2 1443; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] 1444; 1445; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: 1446; GISEL-GFX9GEN: ; %bb.0: 1447; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1448; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0 1449; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1450; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1 1451; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1452; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v1, 0x3e230000 1453; GISEL-GFX9GEN-NEXT: v_madak_f32 v0, v2, v0, 0x3e230000 1454; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v1, v3, v4 1455; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1456; 1457; GISEL-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: 1458; GISEL-VI: ; %bb.0: 1459; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1460; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0 1461; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1462; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1 1463; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1464; GISEL-VI-NEXT: v_mov_b32_e32 v1, 0x3e230000 1465; GISEL-VI-NEXT: v_madak_f32 v0, v2, v0, 0x3e230000 1466; GISEL-VI-NEXT: v_mac_f32_e32 v1, v3, v4 1467; GISEL-VI-NEXT: s_setpc_b64 s[30:31] 1468; 1469; GISEL-CI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi: 1470; GISEL-CI: ; %bb.0: 1471; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1472; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 1473; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v1 1474; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 1475; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 1476; GISEL-CI-NEXT: v_mov_b32_e32 v1, 0x3e230000 1477; GISEL-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e230000 1478; GISEL-CI-NEXT: v_mac_f32_e32 v1, v4, v3 1479; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 1480 %src0.ext = fpext <2 x half> %src0 to <2 x float> 1481 %src1.ext = fpext <2 x half> %src1 to <2 x float> 1482 %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float> 1483 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> %src2) 1484 ret <2 x float> %result 1485} 1486 1487define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %src1) #0 { 1488; SDAG-GFX1100-LABEL: v_mad_mix_v2f32_f32imminv2pi: 1489; SDAG-GFX1100: ; %bb.0: 1490; SDAG-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1491; SDAG-GFX1100-NEXT: s_mov_b32 s0, 0.15915494 1492; SDAG-GFX1100-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1493; SDAG-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, s0 op_sel_hi:[1,1,0] 1494; SDAG-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, s0 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1495; SDAG-GFX1100-NEXT: v_mov_b32_e32 v0, v2 1496; SDAG-GFX1100-NEXT: s_setpc_b64 s[30:31] 1497; 1498; SDAG-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi: 1499; SDAG-GFX900: ; %bb.0: 1500; SDAG-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1501; SDAG-GFX900-NEXT: s_mov_b32 s4, 0.15915494 1502; SDAG-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1503; SDAG-GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] 1504; SDAG-GFX900-NEXT: v_mov_b32_e32 v1, v2 1505; SDAG-GFX900-NEXT: s_setpc_b64 s[30:31] 1506; 1507; SDAG-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi: 1508; SDAG-GFX906: ; %bb.0: 1509; SDAG-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1510; SDAG-GFX906-NEXT: s_mov_b32 s4, 0.15915494 1511; SDAG-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, s4 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1512; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0] 1513; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2 1514; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31] 1515; 1516; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi: 1517; SDAG-GFX9GEN: ; %bb.0: 1518; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1519; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1520; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 1521; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1 1522; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1523; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, v0, v3, 0.15915494 1524; SDAG-GFX9GEN-NEXT: v_mad_f32 v1, v2, v1, 0.15915494 1525; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1526; 1527; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi: 1528; SDAG-VI: ; %bb.0: 1529; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1530; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1531; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 1532; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v3, v1 1533; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1534; SDAG-VI-NEXT: v_mad_f32 v0, v0, v3, 0.15915494 1535; SDAG-VI-NEXT: v_mad_f32 v1, v2, v1, 0.15915494 1536; SDAG-VI-NEXT: s_setpc_b64 s[30:31] 1537; 1538; SDAG-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi: 1539; SDAG-CI: ; %bb.0: 1540; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1541; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v3, v3 1542; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v2, v2 1543; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v0, v0 1544; SDAG-CI-NEXT: v_cvt_f16_f32_e32 v1, v1 1545; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 1546; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 1547; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 1548; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v4, v1 1549; SDAG-CI-NEXT: v_mov_b32_e32 v1, 0x3e22f983 1550; SDAG-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e22f983 1551; SDAG-CI-NEXT: v_mac_f32_e32 v1, v4, v3 1552; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 1553; 1554; GISEL-GFX1100-LABEL: v_mad_mix_v2f32_f32imminv2pi: 1555; GISEL-GFX1100: ; %bb.0: 1556; GISEL-GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1557; GISEL-GFX1100-NEXT: v_mov_b32_e32 v3, 0.15915494 1558; GISEL-GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1559; GISEL-GFX1100-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0] 1560; GISEL-GFX1100-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1561; GISEL-GFX1100-NEXT: v_mov_b32_e32 v0, v2 1562; GISEL-GFX1100-NEXT: s_setpc_b64 s[30:31] 1563; 1564; GISEL-GFX900-LABEL: v_mad_mix_v2f32_f32imminv2pi: 1565; GISEL-GFX900: ; %bb.0: 1566; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1567; GISEL-GFX900-NEXT: v_mov_b32_e32 v3, 0.15915494 1568; GISEL-GFX900-NEXT: v_mad_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0] 1569; GISEL-GFX900-NEXT: v_mad_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1570; GISEL-GFX900-NEXT: v_mov_b32_e32 v0, v2 1571; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] 1572; 1573; GISEL-GFX906-LABEL: v_mad_mix_v2f32_f32imminv2pi: 1574; GISEL-GFX906: ; %bb.0: 1575; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1576; GISEL-GFX906-NEXT: v_mov_b32_e32 v3, 0.15915494 1577; GISEL-GFX906-NEXT: v_fma_mix_f32 v2, v0, v1, v3 op_sel_hi:[1,1,0] 1578; GISEL-GFX906-NEXT: v_fma_mix_f32 v1, v0, v1, v3 op_sel:[1,1,0] op_sel_hi:[1,1,0] 1579; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2 1580; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] 1581; 1582; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi: 1583; GISEL-GFX9GEN: ; %bb.0: 1584; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1585; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0 1586; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1587; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1 1588; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1589; GISEL-GFX9GEN-NEXT: v_mad_f32 v0, v2, v0, 0.15915494 1590; GISEL-GFX9GEN-NEXT: v_mad_f32 v1, v3, v1, 0.15915494 1591; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1592; 1593; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi: 1594; GISEL-VI: ; %bb.0: 1595; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1596; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v2, v0 1597; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1598; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v1 1599; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1600; GISEL-VI-NEXT: v_mad_f32 v0, v2, v0, 0.15915494 1601; GISEL-VI-NEXT: v_mad_f32 v1, v3, v1, 0.15915494 1602; GISEL-VI-NEXT: s_setpc_b64 s[30:31] 1603; 1604; GISEL-CI-LABEL: v_mad_mix_v2f32_f32imminv2pi: 1605; GISEL-CI: ; %bb.0: 1606; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1607; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 1608; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v4, v1 1609; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 1610; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v3 1611; GISEL-CI-NEXT: v_mov_b32_e32 v1, 0x3e22f983 1612; GISEL-CI-NEXT: v_madak_f32 v0, v0, v2, 0x3e22f983 1613; GISEL-CI-NEXT: v_mac_f32_e32 v1, v4, v3 1614; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 1615 %src0.ext = fpext <2 x half> %src0 to <2 x float> 1616 %src1.ext = fpext <2 x half> %src1 to <2 x float> 1617 %src2 = fpext <2 x half> <half 0xH3118, half 0xH3118> to <2 x float> 1618 %result = tail call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %src0.ext, <2 x float> %src1.ext, <2 x float> <float 0x3FC45F3060000000, float 0x3FC45F3060000000>) 1619 ret <2 x float> %result 1620} 1621 1622define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %src1, <2 x half> %src2) #0 { 1623; GFX1100-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: 1624; GFX1100: ; %bb.0: 1625; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1626; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 1627; GFX1100-NEXT: s_setpc_b64 s[30:31] 1628; 1629; GFX900-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: 1630; GFX900: ; %bb.0: 1631; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1632; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 1633; GFX900-NEXT: s_setpc_b64 s[30:31] 1634; 1635; GFX906-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: 1636; GFX906: ; %bb.0: 1637; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1638; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp 1639; GFX906-NEXT: s_setpc_b64 s[30:31] 1640; 1641; GFX9GEN-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: 1642; GFX9GEN: ; %bb.0: 1643; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1644; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1645; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1646; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1647; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2 clamp 1648; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1649; 1650; VI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: 1651; VI: ; %bb.0: 1652; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1653; VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1654; VI-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1655; VI-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1656; VI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp 1657; VI-NEXT: s_setpc_b64 s[30:31] 1658; 1659; SDAG-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: 1660; SDAG-CI: ; %bb.0: 1661; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1662; SDAG-CI-NEXT: v_mad_f32 v0, v1, v3, v5 clamp 1663; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 1664; 1665; GISEL-CI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt: 1666; GISEL-CI: ; %bb.0: 1667; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1668; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v1 1669; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v3 1670; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v5 1671; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 clamp 1672; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 1673 %src0.hi = extractelement <2 x half> %src0, i32 1 1674 %src1.hi = extractelement <2 x half> %src1, i32 1 1675 %src2.hi = extractelement <2 x half> %src2, i32 1 1676 %src0.ext = fpext half %src0.hi to float 1677 %src1.ext = fpext half %src1.hi to float 1678 %src2.ext = fpext half %src2.hi to float 1679 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 1680 %max = call float @llvm.maxnum.f32(float %result, float 0.0) 1681 %clamp = call float @llvm.minnum.f32(float %max, float 1.0) 1682 ret float %clamp 1683} 1684 1685define float @no_mix_simple(float %src0, float %src1, float %src2) #0 { 1686; GFX1100-LABEL: no_mix_simple: 1687; GFX1100: ; %bb.0: 1688; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1689; GFX1100-NEXT: v_fma_f32 v0, v0, v1, v2 1690; GFX1100-NEXT: s_setpc_b64 s[30:31] 1691; 1692; GFX900-LABEL: no_mix_simple: 1693; GFX900: ; %bb.0: 1694; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1695; GFX900-NEXT: v_mad_f32 v0, v0, v1, v2 1696; GFX900-NEXT: s_setpc_b64 s[30:31] 1697; 1698; GFX906-LABEL: no_mix_simple: 1699; GFX906: ; %bb.0: 1700; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1701; GFX906-NEXT: v_fma_f32 v0, v0, v1, v2 1702; GFX906-NEXT: s_setpc_b64 s[30:31] 1703; 1704; GFX9GEN-LABEL: no_mix_simple: 1705; GFX9GEN: ; %bb.0: 1706; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1707; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2 1708; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1709; 1710; VI-LABEL: no_mix_simple: 1711; VI: ; %bb.0: 1712; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1713; VI-NEXT: v_mad_f32 v0, v0, v1, v2 1714; VI-NEXT: s_setpc_b64 s[30:31] 1715; 1716; CI-LABEL: no_mix_simple: 1717; CI: ; %bb.0: 1718; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1719; CI-NEXT: v_mad_f32 v0, v0, v1, v2 1720; CI-NEXT: s_setpc_b64 s[30:31] 1721 %result = call float @llvm.fmuladd.f32(float %src0, float %src1, float %src2) 1722 ret float %result 1723} 1724 1725define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 { 1726; GFX1100-LABEL: no_mix_simple_fabs: 1727; GFX1100: ; %bb.0: 1728; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1729; GFX1100-NEXT: v_fma_f32 v0, |v0|, v1, v2 1730; GFX1100-NEXT: s_setpc_b64 s[30:31] 1731; 1732; GFX900-LABEL: no_mix_simple_fabs: 1733; GFX900: ; %bb.0: 1734; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1735; GFX900-NEXT: v_mad_f32 v0, |v0|, v1, v2 1736; GFX900-NEXT: s_setpc_b64 s[30:31] 1737; 1738; GFX906-LABEL: no_mix_simple_fabs: 1739; GFX906: ; %bb.0: 1740; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1741; GFX906-NEXT: v_fma_f32 v0, |v0|, v1, v2 1742; GFX906-NEXT: s_setpc_b64 s[30:31] 1743; 1744; GFX9GEN-LABEL: no_mix_simple_fabs: 1745; GFX9GEN: ; %bb.0: 1746; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1747; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2 1748; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1749; 1750; VI-LABEL: no_mix_simple_fabs: 1751; VI: ; %bb.0: 1752; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1753; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2 1754; VI-NEXT: s_setpc_b64 s[30:31] 1755; 1756; CI-LABEL: no_mix_simple_fabs: 1757; CI: ; %bb.0: 1758; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1759; CI-NEXT: v_mad_f32 v0, |v0|, v1, v2 1760; CI-NEXT: s_setpc_b64 s[30:31] 1761 %src0.fabs = call float @llvm.fabs.f32(float %src0) 1762 %result = call float @llvm.fmuladd.f32(float %src0.fabs, float %src1, float %src2) 1763 ret float %result 1764} 1765 1766; FIXME(DAG): Should abe able to select in this case. 1767; All sources are converted from f16, so it doesn't matter 1768; v_mad_mix_f32 flushes. 1769 1770define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %src1, half %src2) #1 { 1771; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: 1772; GFX1100: ; %bb.0: 1773; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1774; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] 1775; GFX1100-NEXT: s_setpc_b64 s[30:31] 1776; 1777; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: 1778; GFX900: ; %bb.0: 1779; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1780; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0 1781; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1 1782; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v2 1783; GFX900-NEXT: v_fma_f32 v0, v0, v1, v2 1784; GFX900-NEXT: s_setpc_b64 s[30:31] 1785; 1786; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: 1787; GFX906: ; %bb.0: 1788; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1789; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] 1790; GFX906-NEXT: s_setpc_b64 s[30:31] 1791; 1792; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: 1793; GFX9GEN: ; %bb.0: 1794; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1795; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 1796; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 1797; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 1798; GFX9GEN-NEXT: v_fma_f32 v0, v0, v1, v2 1799; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1800; 1801; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: 1802; VI: ; %bb.0: 1803; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1804; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 1805; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 1806; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 1807; VI-NEXT: v_mul_f32_e32 v0, v0, v1 1808; VI-NEXT: v_add_f32_e32 v0, v0, v2 1809; VI-NEXT: s_setpc_b64 s[30:31] 1810; 1811; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: 1812; SDAG-CI: ; %bb.0: 1813; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1814; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2 1815; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 1816; 1817; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals: 1818; GISEL-CI: ; %bb.0: 1819; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1820; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 1821; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 1822; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 1823; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2 1824; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 1825 %src0.ext = fpext half %src0 to float 1826 %src1.ext = fpext half %src1 to float 1827 %src2.ext = fpext half %src2 to float 1828 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 1829 ret float %result 1830} 1831 1832define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, float %src2) #1 { 1833; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: 1834; GFX1100: ; %bb.0: 1835; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1836; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 1837; GFX1100-NEXT: s_setpc_b64 s[30:31] 1838; 1839; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: 1840; GFX900: ; %bb.0: 1841; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1842; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0 1843; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1 1844; GFX900-NEXT: v_fma_f32 v0, v0, v1, v2 1845; GFX900-NEXT: s_setpc_b64 s[30:31] 1846; 1847; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: 1848; GFX906: ; %bb.0: 1849; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1850; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 1851; GFX906-NEXT: s_setpc_b64 s[30:31] 1852; 1853; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: 1854; GFX9GEN: ; %bb.0: 1855; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1856; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 1857; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 1858; GFX9GEN-NEXT: v_fma_f32 v0, v0, v1, v2 1859; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1860; 1861; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: 1862; VI: ; %bb.0: 1863; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1864; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 1865; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 1866; VI-NEXT: v_mul_f32_e32 v0, v0, v1 1867; VI-NEXT: v_add_f32_e32 v0, v0, v2 1868; VI-NEXT: s_setpc_b64 s[30:31] 1869; 1870; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: 1871; SDAG-CI: ; %bb.0: 1872; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1873; SDAG-CI-NEXT: v_fma_f32 v0, v0, v1, v2 1874; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 1875; 1876; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals: 1877; GISEL-CI: ; %bb.0: 1878; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1879; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 1880; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 1881; GISEL-CI-NEXT: v_fma_f32 v0, v0, v1, v2 1882; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 1883 %src0.ext = fpext half %src0 to float 1884 %src1.ext = fpext half %src1 to float 1885 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2) 1886 ret float %result 1887} 1888 1889define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, half %src2) #1 { 1890; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: 1891; GFX1100: ; %bb.0: 1892; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1893; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 1894; GFX1100-NEXT: v_cvt_f32_f16_e32 v1, v1 1895; GFX1100-NEXT: v_cvt_f32_f16_e32 v2, v2 1896; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1897; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 1898; GFX1100-NEXT: v_add_f32_e32 v0, v0, v2 1899; GFX1100-NEXT: s_setpc_b64 s[30:31] 1900; 1901; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: 1902; GFX900: ; %bb.0: 1903; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1904; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0 1905; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1 1906; GFX900-NEXT: v_cvt_f32_f16_e32 v2, v2 1907; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1 1908; GFX900-NEXT: v_add_f32_e32 v0, v0, v2 1909; GFX900-NEXT: s_setpc_b64 s[30:31] 1910; 1911; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: 1912; GFX906: ; %bb.0: 1913; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1914; GFX906-NEXT: v_cvt_f32_f16_e32 v0, v0 1915; GFX906-NEXT: v_cvt_f32_f16_e32 v1, v1 1916; GFX906-NEXT: v_cvt_f32_f16_e32 v2, v2 1917; GFX906-NEXT: v_mul_f32_e32 v0, v0, v1 1918; GFX906-NEXT: v_add_f32_e32 v0, v0, v2 1919; GFX906-NEXT: s_setpc_b64 s[30:31] 1920; 1921; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: 1922; GFX9GEN: ; %bb.0: 1923; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1924; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 1925; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 1926; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 1927; GFX9GEN-NEXT: v_mul_f32_e32 v0, v0, v1 1928; GFX9GEN-NEXT: v_add_f32_e32 v0, v0, v2 1929; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 1930; 1931; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: 1932; VI: ; %bb.0: 1933; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1934; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 1935; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 1936; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 1937; VI-NEXT: v_mul_f32_e32 v0, v0, v1 1938; VI-NEXT: v_add_f32_e32 v0, v0, v2 1939; VI-NEXT: s_setpc_b64 s[30:31] 1940; 1941; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: 1942; SDAG-CI: ; %bb.0: 1943; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1944; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1 1945; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2 1946; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 1947; 1948; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd: 1949; GISEL-CI: ; %bb.0: 1950; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1951; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 1952; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 1953; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 1954; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1 1955; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2 1956; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 1957 %src0.ext = fpext half %src0 to float 1958 %src1.ext = fpext half %src1 to float 1959 %src2.ext = fpext half %src2 to float 1960 %mul = fmul float %src0.ext, %src1.ext 1961 %result = fadd float %mul, %src2.ext 1962 ret float %result 1963} 1964 1965define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half %src1, float %src2) #1 { 1966; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: 1967; GFX1100: ; %bb.0: 1968; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1969; GFX1100-NEXT: v_cvt_f32_f16_e32 v0, v0 1970; GFX1100-NEXT: v_cvt_f32_f16_e32 v1, v1 1971; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1972; GFX1100-NEXT: v_mul_f32_e32 v0, v0, v1 1973; GFX1100-NEXT: v_add_f32_e32 v0, v0, v2 1974; GFX1100-NEXT: s_setpc_b64 s[30:31] 1975; 1976; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: 1977; GFX900: ; %bb.0: 1978; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1979; GFX900-NEXT: v_cvt_f32_f16_e32 v0, v0 1980; GFX900-NEXT: v_cvt_f32_f16_e32 v1, v1 1981; GFX900-NEXT: v_mul_f32_e32 v0, v0, v1 1982; GFX900-NEXT: v_add_f32_e32 v0, v0, v2 1983; GFX900-NEXT: s_setpc_b64 s[30:31] 1984; 1985; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: 1986; GFX906: ; %bb.0: 1987; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1988; GFX906-NEXT: v_cvt_f32_f16_e32 v0, v0 1989; GFX906-NEXT: v_cvt_f32_f16_e32 v1, v1 1990; GFX906-NEXT: v_mul_f32_e32 v0, v0, v1 1991; GFX906-NEXT: v_add_f32_e32 v0, v0, v2 1992; GFX906-NEXT: s_setpc_b64 s[30:31] 1993; 1994; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: 1995; GFX9GEN: ; %bb.0: 1996; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1997; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 1998; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 1999; GFX9GEN-NEXT: v_mul_f32_e32 v0, v0, v1 2000; GFX9GEN-NEXT: v_add_f32_e32 v0, v0, v2 2001; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 2002; 2003; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: 2004; VI: ; %bb.0: 2005; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2006; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 2007; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 2008; VI-NEXT: v_mul_f32_e32 v0, v0, v1 2009; VI-NEXT: v_add_f32_e32 v0, v0, v2 2010; VI-NEXT: s_setpc_b64 s[30:31] 2011; 2012; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: 2013; SDAG-CI: ; %bb.0: 2014; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2015; SDAG-CI-NEXT: v_mul_f32_e32 v0, v0, v1 2016; SDAG-CI-NEXT: v_add_f32_e32 v0, v0, v2 2017; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 2018; 2019; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd: 2020; GISEL-CI: ; %bb.0: 2021; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2022; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 2023; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 2024; GISEL-CI-NEXT: v_mul_f32_e32 v0, v0, v1 2025; GISEL-CI-NEXT: v_add_f32_e32 v0, v0, v2 2026; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 2027 %src0.ext = fpext half %src0 to float 2028 %src1.ext = fpext half %src1 to float 2029 %mul = fmul float %src0.ext, %src1.ext 2030 %result = fadd float %mul, %src2 2031 ret float %result 2032} 2033 2034define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, half %src2) #0 { 2035; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: 2036; GFX1100: ; %bb.0: 2037; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2038; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] 2039; GFX1100-NEXT: s_setpc_b64 s[30:31] 2040; 2041; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: 2042; GFX900: ; %bb.0: 2043; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2044; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] 2045; GFX900-NEXT: s_setpc_b64 s[30:31] 2046; 2047; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: 2048; GFX906: ; %bb.0: 2049; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2050; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1] 2051; GFX906-NEXT: s_setpc_b64 s[30:31] 2052; 2053; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: 2054; GFX9GEN: ; %bb.0: 2055; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2056; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0 2057; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 2058; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 2059; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 2060; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 2061; 2062; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: 2063; VI: ; %bb.0: 2064; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2065; VI-NEXT: v_cvt_f32_f16_e32 v3, v0 2066; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 2067; VI-NEXT: v_cvt_f32_f16_e32 v0, v2 2068; VI-NEXT: v_mac_f32_e32 v0, v3, v1 2069; VI-NEXT: s_setpc_b64 s[30:31] 2070; 2071; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: 2072; SDAG-CI: ; %bb.0: 2073; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2074; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 2075; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 2076; 2077; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd: 2078; GISEL-CI: ; %bb.0: 2079; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2080; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0 2081; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 2082; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 2083; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 2084; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 2085 %src0.ext = fpext half %src0 to float 2086 %src1.ext = fpext half %src1 to float 2087 %src2.ext = fpext half %src2 to float 2088 %mul = fmul contract float %src0.ext, %src1.ext 2089 %result = fadd contract float %mul, %src2.ext 2090 ret float %result 2091} 2092 2093define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src1, float %src2) #0 { 2094; GFX1100-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: 2095; GFX1100: ; %bb.0: 2096; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2097; GFX1100-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 2098; GFX1100-NEXT: s_setpc_b64 s[30:31] 2099; 2100; GFX900-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: 2101; GFX900: ; %bb.0: 2102; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2103; GFX900-NEXT: v_mad_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 2104; GFX900-NEXT: s_setpc_b64 s[30:31] 2105; 2106; GFX906-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: 2107; GFX906: ; %bb.0: 2108; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2109; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0] 2110; GFX906-NEXT: s_setpc_b64 s[30:31] 2111; 2112; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: 2113; GFX9GEN: ; %bb.0: 2114; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2115; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 2116; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 2117; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2 2118; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 2119; 2120; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: 2121; VI: ; %bb.0: 2122; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2123; VI-NEXT: v_cvt_f32_f16_e32 v0, v0 2124; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 2125; VI-NEXT: v_mad_f32 v0, v0, v1, v2 2126; VI-NEXT: s_setpc_b64 s[30:31] 2127; 2128; SDAG-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: 2129; SDAG-CI: ; %bb.0: 2130; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2131; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 2132; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 2133; 2134; GISEL-CI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd: 2135; GISEL-CI: ; %bb.0: 2136; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2137; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 2138; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 2139; GISEL-CI-NEXT: v_mad_f32 v0, v0, v1, v2 2140; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 2141 %src0.ext = fpext half %src0 to float 2142 %src1.ext = fpext half %src1 to float 2143 %mul = fmul contract float %src0.ext, %src1.ext 2144 %result = fadd contract float %mul, %src2 2145 ret float %result 2146} 2147 2148define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 2149; GFX1100-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: 2150; GFX1100: ; %bb.0: 2151; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2152; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] 2153; GFX1100-NEXT: s_setpc_b64 s[30:31] 2154; 2155; GFX900-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: 2156; GFX900: ; %bb.0: 2157; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2158; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] 2159; GFX900-NEXT: s_setpc_b64 s[30:31] 2160; 2161; GFX906-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: 2162; GFX906: ; %bb.0: 2163; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2164; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1] 2165; GFX906-NEXT: s_setpc_b64 s[30:31] 2166; 2167; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: 2168; SDAG-GFX9GEN: ; %bb.0: 2169; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2170; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0 2171; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 2172; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 2173; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2 2174; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 2175; 2176; SDAG-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: 2177; SDAG-VI: ; %bb.0: 2178; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2179; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v0 2180; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 2181; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 2182; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2 2183; SDAG-VI-NEXT: s_setpc_b64 s[30:31] 2184; 2185; SDAG-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: 2186; SDAG-CI: ; %bb.0: 2187; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2188; SDAG-CI-NEXT: v_cvt_f32_f16_e32 v0, v0 2189; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2 2190; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 2191; 2192; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: 2193; GISEL-GFX9GEN: ; %bb.0: 2194; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2195; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e64 v3, -v0 2196; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 2197; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 2198; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 2199; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 2200; 2201; GISEL-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: 2202; GISEL-VI: ; %bb.0: 2203; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2204; GISEL-VI-NEXT: v_cvt_f32_f16_e64 v3, -v0 2205; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 2206; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 2207; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1 2208; GISEL-VI-NEXT: s_setpc_b64 s[30:31] 2209; 2210; GISEL-CI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo: 2211; GISEL-CI: ; %bb.0: 2212; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2213; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, -v0 2214; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 2215; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 2216; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 2217; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 2218 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 2219 %src0 = extractelement <2 x half> %src0.arg.bc, i32 0 2220 %src0.neg = fneg half %src0 2221 %src0.ext = fpext half %src0.neg to float 2222 %src1.ext = fpext half %src1 to float 2223 %src2.ext = fpext half %src2 to float 2224; %src0.ext.neg = fneg float %src0.ext 2225 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 2226 ret float %result 2227} 2228 2229; Make sure we don't fold pre-cvt fneg if we already have a fabs 2230 2231define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 2232; GFX1100-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: 2233; GFX1100: ; %bb.0: 2234; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2235; GFX1100-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2236; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2237; GFX1100-NEXT: v_xor_b32_e32 v0, 0x8000, v0 2238; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] 2239; GFX1100-NEXT: s_setpc_b64 s[30:31] 2240; 2241; GFX900-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: 2242; GFX900: ; %bb.0: 2243; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2244; GFX900-NEXT: s_mov_b32 s4, 0x8000 2245; GFX900-NEXT: v_xor_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 2246; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] 2247; GFX900-NEXT: s_setpc_b64 s[30:31] 2248; 2249; GFX906-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: 2250; GFX906: ; %bb.0: 2251; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2252; GFX906-NEXT: s_mov_b32 s4, 0x8000 2253; GFX906-NEXT: v_xor_b32_sdwa v0, s4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 2254; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1] 2255; GFX906-NEXT: s_setpc_b64 s[30:31] 2256; 2257; GFX9GEN-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: 2258; GFX9GEN: ; %bb.0: 2259; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2260; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2261; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 2262; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 2263; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2 2264; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 2265; 2266; VI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: 2267; VI: ; %bb.0: 2268; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2269; VI-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2270; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 2271; VI-NEXT: v_cvt_f32_f16_e32 v2, v2 2272; VI-NEXT: v_mad_f32 v0, |v0|, v1, v2 2273; VI-NEXT: s_setpc_b64 s[30:31] 2274; 2275; SDAG-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: 2276; SDAG-CI: ; %bb.0: 2277; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2278; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2279; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0| 2280; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 2281; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 2282; 2283; GISEL-CI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo: 2284; GISEL-CI: ; %bb.0: 2285; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2286; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2287; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0 2288; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 2289; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v2, v2 2290; GISEL-CI-NEXT: v_mad_f32 v0, |v0|, v1, v2 2291; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 2292 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 2293 %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 2294 %src0.neg = fneg half %src0 2295 %src0.ext = fpext half %src0.neg to float 2296 %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) 2297 %src1.ext = fpext half %src1 to float 2298 %src2.ext = fpext half %src2 to float 2299 %result = tail call float @llvm.fmuladd.f32(float %src0.ext.abs, float %src1.ext, float %src2.ext) 2300 ret float %result 2301} 2302 2303define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 2304; GFX1100-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: 2305; GFX1100: ; %bb.0: 2306; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2307; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 2308; GFX1100-NEXT: s_setpc_b64 s[30:31] 2309; 2310; GFX900-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: 2311; GFX900: ; %bb.0: 2312; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2313; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 2314; GFX900-NEXT: s_setpc_b64 s[30:31] 2315; 2316; GFX906-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: 2317; GFX906: ; %bb.0: 2318; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2319; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 2320; GFX906-NEXT: s_setpc_b64 s[30:31] 2321; 2322; GFX9GEN-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: 2323; GFX9GEN: ; %bb.0: 2324; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2325; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2326; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 2327; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 2328; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 2329; GFX9GEN-NEXT: s_setpc_b64 s[30:31] 2330; 2331; VI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: 2332; VI: ; %bb.0: 2333; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2334; VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2335; VI-NEXT: v_cvt_f32_f16_e32 v1, v1 2336; VI-NEXT: v_cvt_f32_f16_e32 v0, v2 2337; VI-NEXT: v_mac_f32_e32 v0, v3, v1 2338; VI-NEXT: s_setpc_b64 s[30:31] 2339; 2340; SDAG-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: 2341; SDAG-CI: ; %bb.0: 2342; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2343; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2344; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0| 2345; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 2346; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 2347; 2348; GISEL-CI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo: 2349; GISEL-CI: ; %bb.0: 2350; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2351; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2352; GISEL-CI-NEXT: v_cvt_f32_f16_e64 v3, |v0| 2353; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 2354; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 2355; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 2356; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 2357 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 2358 %src0 = extractelement <2 x half> %src0.arg.bc, i32 1 2359 %src0.abs = call half @llvm.fabs.f16(half %src0) 2360 %src0.ext = fpext half %src0.abs to float 2361 %src1.ext = fpext half %src1 to float 2362 %src2.ext = fpext half %src2 to float 2363 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 2364 ret float %result 2365} 2366 2367define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 2368; GFX1100-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: 2369; GFX1100: ; %bb.0: 2370; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2371; GFX1100-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 2372; GFX1100-NEXT: s_setpc_b64 s[30:31] 2373; 2374; GFX900-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: 2375; GFX900: ; %bb.0: 2376; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2377; GFX900-NEXT: v_mad_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 2378; GFX900-NEXT: s_setpc_b64 s[30:31] 2379; 2380; GFX906-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: 2381; GFX906: ; %bb.0: 2382; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2383; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 2384; GFX906-NEXT: s_setpc_b64 s[30:31] 2385; 2386; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: 2387; SDAG-GFX9GEN: ; %bb.0: 2388; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2389; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2390; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 2391; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 2392; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2 2393; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 2394; 2395; SDAG-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: 2396; SDAG-VI: ; %bb.0: 2397; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2398; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2399; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 2400; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 2401; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2 2402; SDAG-VI-NEXT: s_setpc_b64 s[30:31] 2403; 2404; SDAG-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: 2405; SDAG-CI: ; %bb.0: 2406; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2407; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2408; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -v0 2409; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 2410; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 2411; 2412; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: 2413; GISEL-GFX9GEN: ; %bb.0: 2414; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2415; GISEL-GFX9GEN-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 2416; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2417; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 2418; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 2419; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 2420; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 2421; 2422; GISEL-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: 2423; GISEL-VI: ; %bb.0: 2424; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2425; GISEL-VI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 2426; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2427; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 2428; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 2429; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1 2430; GISEL-VI-NEXT: s_setpc_b64 s[30:31] 2431; 2432; GISEL-CI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo: 2433; GISEL-CI: ; %bb.0: 2434; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2435; GISEL-CI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 2436; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2437; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0 2438; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 2439; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 2440; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 2441; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 2442 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 2443 %fneg = fneg <2 x half> %src0.arg.bc 2444 %src0 = extractelement <2 x half> %fneg, i32 1 2445 %src0.ext = fpext half %src0 to float 2446 %src1.ext = fpext half %src1 to float 2447 %src2.ext = fpext half %src2 to float 2448 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 2449 ret float %result 2450} 2451 2452define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 2453; GFX1100-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: 2454; GFX1100: ; %bb.0: 2455; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2456; GFX1100-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 2457; GFX1100-NEXT: s_setpc_b64 s[30:31] 2458; 2459; GFX900-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: 2460; GFX900: ; %bb.0: 2461; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2462; GFX900-NEXT: v_mad_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 2463; GFX900-NEXT: s_setpc_b64 s[30:31] 2464; 2465; GFX906-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: 2466; GFX906: ; %bb.0: 2467; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2468; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 2469; GFX906-NEXT: s_setpc_b64 s[30:31] 2470; 2471; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: 2472; SDAG-GFX9GEN: ; %bb.0: 2473; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2474; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2475; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 2476; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 2477; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 2478; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 2479; 2480; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: 2481; SDAG-VI: ; %bb.0: 2482; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2483; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2484; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 2485; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 2486; SDAG-VI-NEXT: v_mac_f32_e32 v0, v3, v1 2487; SDAG-VI-NEXT: s_setpc_b64 s[30:31] 2488; 2489; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: 2490; SDAG-CI: ; %bb.0: 2491; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2492; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2493; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, |v0| 2494; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 2495; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 2496; 2497; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: 2498; GISEL-GFX9GEN: ; %bb.0: 2499; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2500; GISEL-GFX9GEN-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 2501; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2502; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 2503; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 2504; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 2505; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 2506; 2507; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: 2508; GISEL-VI: ; %bb.0: 2509; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2510; GISEL-VI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 2511; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2512; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 2513; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 2514; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1 2515; GISEL-VI-NEXT: s_setpc_b64 s[30:31] 2516; 2517; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo: 2518; GISEL-CI: ; %bb.0: 2519; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2520; GISEL-CI-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 2521; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2522; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0 2523; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 2524; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 2525; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 2526; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 2527 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 2528 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) 2529 %src0 = extractelement <2 x half> %fabs, i32 1 2530 %src0.ext = fpext half %src0 to float 2531 %src1.ext = fpext half %src1 to float 2532 %src2.ext = fpext half %src2 to float 2533 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 2534 ret float %result 2535} 2536 2537define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half %src1, half %src2) #0 { 2538; GFX1100-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: 2539; GFX1100: ; %bb.0: 2540; GFX1100-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2541; GFX1100-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 2542; GFX1100-NEXT: s_setpc_b64 s[30:31] 2543; 2544; GFX900-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: 2545; GFX900: ; %bb.0: 2546; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2547; GFX900-NEXT: v_mad_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 2548; GFX900-NEXT: s_setpc_b64 s[30:31] 2549; 2550; GFX906-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: 2551; GFX906: ; %bb.0: 2552; GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2553; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1] 2554; GFX906-NEXT: s_setpc_b64 s[30:31] 2555; 2556; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: 2557; SDAG-GFX9GEN: ; %bb.0: 2558; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2559; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2560; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 2561; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2 2562; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2 2563; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 2564; 2565; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: 2566; SDAG-VI: ; %bb.0: 2567; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2568; SDAG-VI-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2569; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 2570; SDAG-VI-NEXT: v_cvt_f32_f16_e32 v2, v2 2571; SDAG-VI-NEXT: v_mad_f32 v0, -v0, v1, v2 2572; SDAG-VI-NEXT: s_setpc_b64 s[30:31] 2573; 2574; SDAG-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: 2575; SDAG-CI: ; %bb.0: 2576; SDAG-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2577; SDAG-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2578; SDAG-CI-NEXT: v_cvt_f32_f16_e64 v0, -|v0| 2579; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2 2580; SDAG-CI-NEXT: s_setpc_b64 s[30:31] 2581; 2582; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: 2583; GISEL-GFX9GEN: ; %bb.0: 2584; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2585; GISEL-GFX9GEN-NEXT: v_or_b32_e32 v0, 0x80008000, v0 2586; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2587; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1 2588; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2 2589; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1 2590; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31] 2591; 2592; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: 2593; GISEL-VI: ; %bb.0: 2594; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2595; GISEL-VI-NEXT: v_or_b32_e32 v0, 0x80008000, v0 2596; GISEL-VI-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2597; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v1, v1 2598; GISEL-VI-NEXT: v_cvt_f32_f16_e32 v0, v2 2599; GISEL-VI-NEXT: v_mac_f32_e32 v0, v3, v1 2600; GISEL-VI-NEXT: s_setpc_b64 s[30:31] 2601; 2602; GISEL-CI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo: 2603; GISEL-CI: ; %bb.0: 2604; GISEL-CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2605; GISEL-CI-NEXT: v_or_b32_e32 v0, 0x80008000, v0 2606; GISEL-CI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2607; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v3, v0 2608; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v1, v1 2609; GISEL-CI-NEXT: v_cvt_f32_f16_e32 v0, v2 2610; GISEL-CI-NEXT: v_mac_f32_e32 v0, v3, v1 2611; GISEL-CI-NEXT: s_setpc_b64 s[30:31] 2612 %src0.arg.bc = bitcast i32 %src0.arg to <2 x half> 2613 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %src0.arg.bc) 2614 %fneg.fabs = fneg <2 x half> %fabs 2615 %src0 = extractelement <2 x half> %fneg.fabs, i32 1 2616 %src0.ext = fpext half %src0 to float 2617 %src1.ext = fpext half %src1 to float 2618 %src2.ext = fpext half %src2 to float 2619 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.ext) 2620 ret float %result 2621} 2622 2623declare half @llvm.fabs.f16(half) #2 2624declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #2 2625declare float @llvm.fabs.f32(float) #2 2626declare float @llvm.minnum.f32(float, float) #2 2627declare float @llvm.maxnum.f32(float, float) #2 2628declare float @llvm.fmuladd.f32(float, float, float) #2 2629declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>) #2 2630 2631attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 2632attributes #1 = { nounwind "denormal-fp-math-f32"="ieee,ieee" } 2633attributes #2 = { nounwind readnone speculatable } 2634