1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s 4 5; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s 6; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s 7 8; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-SDAG %s 9; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10,GFX10-GISEL %s 10 11; RUN: llc -global-isel=0 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX11-SDAG %s 12; RUN: llc -global-isel=1 -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11,GFX1-GISEL %s 13 14 15; FIXME: promotion not handled without f16 insts 16 17define half @v_constained_fmul_f16_fpexcept_strict(half %x, half %y) #0 { 18; GCN-LABEL: v_constained_fmul_f16_fpexcept_strict: 19; GCN: ; %bb.0: 20; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; GCN-NEXT: v_mul_f16_e32 v0, v0, v1 22; GCN-NEXT: s_setpc_b64 s[30:31] 23; 24; GFX10PLUS-LABEL: v_constained_fmul_f16_fpexcept_strict: 25; GFX10PLUS: ; %bb.0: 26; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX10PLUS-NEXT: v_mul_f16_e32 v0, v0, v1 28; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 29 %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 30 ret half %val 31} 32 33define half @v_constained_fmul_f16_fpexcept_ignore(half %x, half %y) #0 { 34; GCN-LABEL: v_constained_fmul_f16_fpexcept_ignore: 35; GCN: ; %bb.0: 36; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GCN-NEXT: v_mul_f16_e32 v0, v0, v1 38; GCN-NEXT: s_setpc_b64 s[30:31] 39; 40; GFX10PLUS-LABEL: v_constained_fmul_f16_fpexcept_ignore: 41; GFX10PLUS: ; %bb.0: 42; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 43; GFX10PLUS-NEXT: v_mul_f16_e32 v0, v0, v1 44; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 45 %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 46 ret half %val 47} 48 49define half @v_constained_fmul_f16_fpexcept_maytrap(half %x, half %y) #0 { 50; GCN-LABEL: v_constained_fmul_f16_fpexcept_maytrap: 51; GCN: ; %bb.0: 52; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 53; GCN-NEXT: v_mul_f16_e32 v0, v0, v1 54; GCN-NEXT: s_setpc_b64 s[30:31] 55; 56; GFX10PLUS-LABEL: v_constained_fmul_f16_fpexcept_maytrap: 57; GFX10PLUS: ; %bb.0: 58; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 59; GFX10PLUS-NEXT: v_mul_f16_e32 v0, v0, v1 60; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 61 %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 62 ret half %val 63} 64 65define <2 x half> @v_constained_fmul_v2f16_fpexcept_strict(<2 x half> %x, <2 x half> %y) #0 { 66; GFX9-LABEL: v_constained_fmul_v2f16_fpexcept_strict: 67; GFX9: ; %bb.0: 68; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 69; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 70; GFX9-NEXT: s_setpc_b64 s[30:31] 71; 72; GFX8-SDAG-LABEL: v_constained_fmul_v2f16_fpexcept_strict: 73; GFX8-SDAG: ; %bb.0: 74; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX8-SDAG-NEXT: v_mul_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 76; GFX8-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 77; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 78; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 79; 80; GFX8-GISEL-LABEL: v_constained_fmul_v2f16_fpexcept_strict: 81; GFX8-GISEL: ; %bb.0: 82; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 83; GFX8-GISEL-NEXT: v_mul_f16_e32 v2, v0, v1 84; GFX8-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 85; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 86; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 87; 88; GFX10PLUS-LABEL: v_constained_fmul_v2f16_fpexcept_strict: 89; GFX10PLUS: ; %bb.0: 90; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 91; GFX10PLUS-NEXT: v_pk_mul_f16 v0, v0, v1 92; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 93 %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 94 ret <2 x half> %val 95} 96 97define <2 x half> @v_constained_fmul_v2f16_fpexcept_ignore(<2 x half> %x, <2 x half> %y) #0 { 98; GFX9-LABEL: v_constained_fmul_v2f16_fpexcept_ignore: 99; GFX9: ; %bb.0: 100; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 101; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 102; GFX9-NEXT: s_setpc_b64 s[30:31] 103; 104; GFX8-SDAG-LABEL: v_constained_fmul_v2f16_fpexcept_ignore: 105; GFX8-SDAG: ; %bb.0: 106; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 107; GFX8-SDAG-NEXT: v_mul_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 108; GFX8-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 109; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 110; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 111; 112; GFX8-GISEL-LABEL: v_constained_fmul_v2f16_fpexcept_ignore: 113; GFX8-GISEL: ; %bb.0: 114; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 115; GFX8-GISEL-NEXT: v_mul_f16_e32 v2, v0, v1 116; GFX8-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 117; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 118; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 119; 120; GFX10PLUS-LABEL: v_constained_fmul_v2f16_fpexcept_ignore: 121; GFX10PLUS: ; %bb.0: 122; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 123; GFX10PLUS-NEXT: v_pk_mul_f16 v0, v0, v1 124; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 125 %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore") 126 ret <2 x half> %val 127} 128 129define <2 x half> @v_constained_fmul_v2f16_fpexcept_maytrap(<2 x half> %x, <2 x half> %y) #0 { 130; GFX9-LABEL: v_constained_fmul_v2f16_fpexcept_maytrap: 131; GFX9: ; %bb.0: 132; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GFX9-NEXT: v_pk_mul_f16 v0, v0, v1 134; GFX9-NEXT: s_setpc_b64 s[30:31] 135; 136; GFX8-SDAG-LABEL: v_constained_fmul_v2f16_fpexcept_maytrap: 137; GFX8-SDAG: ; %bb.0: 138; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 139; GFX8-SDAG-NEXT: v_mul_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 140; GFX8-SDAG-NEXT: v_mul_f16_e32 v0, v0, v1 141; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 142; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 143; 144; GFX8-GISEL-LABEL: v_constained_fmul_v2f16_fpexcept_maytrap: 145; GFX8-GISEL: ; %bb.0: 146; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 147; GFX8-GISEL-NEXT: v_mul_f16_e32 v2, v0, v1 148; GFX8-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 149; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0 150; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 151; 152; GFX10PLUS-LABEL: v_constained_fmul_v2f16_fpexcept_maytrap: 153; GFX10PLUS: ; %bb.0: 154; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 155; GFX10PLUS-NEXT: v_pk_mul_f16 v0, v0, v1 156; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 157 %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap") 158 ret <2 x half> %val 159} 160 161define <3 x half> @v_constained_fmul_v3f16_fpexcept_strict(<3 x half> %x, <3 x half> %y) #0 { 162; GFX9-SDAG-LABEL: v_constained_fmul_v3f16_fpexcept_strict: 163; GFX9-SDAG: ; %bb.0: 164; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 165; GFX9-SDAG-NEXT: v_pk_mul_f16 v0, v0, v2 166; GFX9-SDAG-NEXT: v_mul_f16_e32 v1, v1, v3 167; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 168; 169; GFX9-GISEL-LABEL: v_constained_fmul_v3f16_fpexcept_strict: 170; GFX9-GISEL: ; %bb.0: 171; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 172; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 173; GFX9-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3 174; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 175; 176; GFX8-SDAG-LABEL: v_constained_fmul_v3f16_fpexcept_strict: 177; GFX8-SDAG: ; %bb.0: 178; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 179; GFX8-SDAG-NEXT: v_mul_f16_sdwa v4, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 180; GFX8-SDAG-NEXT: v_mul_f16_e32 v0, v0, v2 181; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4 182; GFX8-SDAG-NEXT: v_mul_f16_e32 v1, v1, v3 183; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 184; 185; GFX8-GISEL-LABEL: v_constained_fmul_v3f16_fpexcept_strict: 186; GFX8-GISEL: ; %bb.0: 187; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 188; GFX8-GISEL-NEXT: v_mul_f16_e32 v4, v0, v2 189; GFX8-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 190; GFX8-GISEL-NEXT: v_mul_f16_e32 v1, v1, v3 191; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 192; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 193; 194; GFX10-SDAG-LABEL: v_constained_fmul_v3f16_fpexcept_strict: 195; GFX10-SDAG: ; %bb.0: 196; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 197; GFX10-SDAG-NEXT: v_pk_mul_f16 v0, v0, v2 198; GFX10-SDAG-NEXT: v_mul_f16_e32 v1, v1, v3 199; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 200; 201; GFX10-GISEL-LABEL: v_constained_fmul_v3f16_fpexcept_strict: 202; GFX10-GISEL: ; %bb.0: 203; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 204; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 205; GFX10-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3 206; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 207; 208; GFX11-SDAG-LABEL: v_constained_fmul_v3f16_fpexcept_strict: 209; GFX11-SDAG: ; %bb.0: 210; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 211; GFX11-SDAG-NEXT: v_pk_mul_f16 v0, v0, v2 212; GFX11-SDAG-NEXT: v_mul_f16_e32 v1, v1, v3 213; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 214; 215; GFX1-GISEL-LABEL: v_constained_fmul_v3f16_fpexcept_strict: 216; GFX1-GISEL: ; %bb.0: 217; GFX1-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 218; GFX1-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 219; GFX1-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3 220; GFX1-GISEL-NEXT: s_setpc_b64 s[30:31] 221 %val = call <3 x half> @llvm.experimental.constrained.fmul.v3f16(<3 x half> %x, <3 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 222 ret <3 x half> %val 223} 224 225; FIXME: Scalarized 226define <4 x half> @v_constained_fmul_v4f16_fpexcept_strict(<4 x half> %x, <4 x half> %y) #0 { 227; GFX9-SDAG-LABEL: v_constained_fmul_v4f16_fpexcept_strict: 228; GFX9-SDAG: ; %bb.0: 229; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 230; GFX9-SDAG-NEXT: v_mul_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 231; GFX9-SDAG-NEXT: v_mul_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 232; GFX9-SDAG-NEXT: v_mul_f16_e32 v1, v1, v3 233; GFX9-SDAG-NEXT: v_mul_f16_e32 v0, v0, v2 234; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x5040100 235; GFX9-SDAG-NEXT: v_perm_b32 v0, v5, v0, s4 236; GFX9-SDAG-NEXT: v_perm_b32 v1, v4, v1, s4 237; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 238; 239; GFX9-GISEL-LABEL: v_constained_fmul_v4f16_fpexcept_strict: 240; GFX9-GISEL: ; %bb.0: 241; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 242; GFX9-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 243; GFX9-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3 244; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 245; 246; GFX8-SDAG-LABEL: v_constained_fmul_v4f16_fpexcept_strict: 247; GFX8-SDAG: ; %bb.0: 248; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 249; GFX8-SDAG-NEXT: v_mul_f16_sdwa v4, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 250; GFX8-SDAG-NEXT: v_mul_f16_sdwa v5, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 251; GFX8-SDAG-NEXT: v_mul_f16_e32 v1, v1, v3 252; GFX8-SDAG-NEXT: v_mul_f16_e32 v0, v0, v2 253; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5 254; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v1, v4 255; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 256; 257; GFX8-GISEL-LABEL: v_constained_fmul_v4f16_fpexcept_strict: 258; GFX8-GISEL: ; %bb.0: 259; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 260; GFX8-GISEL-NEXT: v_mul_f16_e32 v4, v0, v2 261; GFX8-GISEL-NEXT: v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 262; GFX8-GISEL-NEXT: v_mul_f16_e32 v2, v1, v3 263; GFX8-GISEL-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 264; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v0 265; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1 266; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 267; 268; GFX10-SDAG-LABEL: v_constained_fmul_v4f16_fpexcept_strict: 269; GFX10-SDAG: ; %bb.0: 270; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 271; GFX10-SDAG-NEXT: v_mul_f16_sdwa v4, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 272; GFX10-SDAG-NEXT: v_mul_f16_sdwa v5, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 273; GFX10-SDAG-NEXT: v_mul_f16_e32 v0, v0, v2 274; GFX10-SDAG-NEXT: v_mul_f16_e32 v1, v1, v3 275; GFX10-SDAG-NEXT: v_perm_b32 v0, v5, v0, 0x5040100 276; GFX10-SDAG-NEXT: v_perm_b32 v1, v4, v1, 0x5040100 277; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] 278; 279; GFX10-GISEL-LABEL: v_constained_fmul_v4f16_fpexcept_strict: 280; GFX10-GISEL: ; %bb.0: 281; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 282; GFX10-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 283; GFX10-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3 284; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] 285; 286; GFX11-SDAG-LABEL: v_constained_fmul_v4f16_fpexcept_strict: 287; GFX11-SDAG: ; %bb.0: 288; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 289; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v3 290; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v2 291; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v0 292; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v7, 16, v1 293; GFX11-SDAG-NEXT: v_mul_f16_e32 v1, v1, v3 294; GFX11-SDAG-NEXT: v_mul_f16_e32 v0, v0, v2 295; GFX11-SDAG-NEXT: v_mul_f16_e32 v2, v6, v5 296; GFX11-SDAG-NEXT: v_mul_f16_e32 v3, v7, v4 297; GFX11-SDAG-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 298; GFX11-SDAG-NEXT: v_perm_b32 v1, v3, v1, 0x5040100 299; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 300; 301; GFX1-GISEL-LABEL: v_constained_fmul_v4f16_fpexcept_strict: 302; GFX1-GISEL: ; %bb.0: 303; GFX1-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 304; GFX1-GISEL-NEXT: v_pk_mul_f16 v0, v0, v2 305; GFX1-GISEL-NEXT: v_pk_mul_f16 v1, v1, v3 306; GFX1-GISEL-NEXT: s_setpc_b64 s[30:31] 307 %val = call <4 x half> @llvm.experimental.constrained.fmul.v4f16(<4 x half> %x, <4 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 308 ret <4 x half> %val 309} 310 311define amdgpu_ps half @s_constained_fmul_f16_fpexcept_strict(half inreg %x, half inreg %y) #0 { 312; GCN-LABEL: s_constained_fmul_f16_fpexcept_strict: 313; GCN: ; %bb.0: 314; GCN-NEXT: v_mov_b32_e32 v0, s3 315; GCN-NEXT: v_mul_f16_e32 v0, s2, v0 316; GCN-NEXT: ; return to shader part epilog 317; 318; GFX10PLUS-LABEL: s_constained_fmul_f16_fpexcept_strict: 319; GFX10PLUS: ; %bb.0: 320; GFX10PLUS-NEXT: v_mul_f16_e64 v0, s2, s3 321; GFX10PLUS-NEXT: ; return to shader part epilog 322 %val = call half @llvm.experimental.constrained.fmul.f16(half %x, half %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 323 ret half %val 324} 325 326define amdgpu_ps <2 x half> @s_constained_fmul_v2f16_fpexcept_strict(<2 x half> inreg %x, <2 x half> inreg %y) #0 { 327; GFX9-LABEL: s_constained_fmul_v2f16_fpexcept_strict: 328; GFX9: ; %bb.0: 329; GFX9-NEXT: v_mov_b32_e32 v0, s3 330; GFX9-NEXT: v_pk_mul_f16 v0, s2, v0 331; GFX9-NEXT: ; return to shader part epilog 332; 333; GFX8-SDAG-LABEL: s_constained_fmul_v2f16_fpexcept_strict: 334; GFX8-SDAG: ; %bb.0: 335; GFX8-SDAG-NEXT: s_lshr_b32 s0, s3, 16 336; GFX8-SDAG-NEXT: s_lshr_b32 s1, s2, 16 337; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, s0 338; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, s1 339; GFX8-SDAG-NEXT: v_mul_f16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 340; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, s3 341; GFX8-SDAG-NEXT: v_mul_f16_e32 v1, s2, v1 342; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 343; GFX8-SDAG-NEXT: ; return to shader part epilog 344; 345; GFX8-GISEL-LABEL: s_constained_fmul_v2f16_fpexcept_strict: 346; GFX8-GISEL: ; %bb.0: 347; GFX8-GISEL-NEXT: s_lshr_b32 s0, s2, 16 348; GFX8-GISEL-NEXT: s_lshr_b32 s1, s3, 16 349; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s3 350; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, s1 351; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 352; GFX8-GISEL-NEXT: v_mul_f16_e32 v0, s2, v0 353; GFX8-GISEL-NEXT: v_mul_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 354; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 355; GFX8-GISEL-NEXT: ; return to shader part epilog 356; 357; GFX10PLUS-LABEL: s_constained_fmul_v2f16_fpexcept_strict: 358; GFX10PLUS: ; %bb.0: 359; GFX10PLUS-NEXT: v_pk_mul_f16 v0, s2, s3 360; GFX10PLUS-NEXT: ; return to shader part epilog 361 %val = call <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half> %x, <2 x half> %y, metadata !"round.tonearest", metadata !"fpexcept.strict") 362 ret <2 x half> %val 363} 364 365declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata) #1 366declare <2 x half> @llvm.experimental.constrained.fmul.v2f16(<2 x half>, <2 x half>, metadata, metadata) #1 367declare <3 x half> @llvm.experimental.constrained.fmul.v3f16(<3 x half>, <3 x half>, metadata, metadata) #1 368declare <4 x half> @llvm.experimental.constrained.fmul.v4f16(<4 x half>, <4 x half>, metadata, metadata) #1 369 370attributes #0 = { strictfp } 371attributes #1 = { inaccessiblememonly nounwind willreturn } 372;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 373; GFX10: {{.*}} 374; GFX11: {{.*}} 375; GFX8: {{.*}} 376