1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; FIXME: Enable f16 promotion 3; XUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-SDAG %s 4; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s 5; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s 6; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s 7 8; XUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-GISEL %s 9; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s 10; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s 11; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s 12 13; define half @test_ldexp_f16_i16(ptr addrspace(1) %out, half %a, i16 %b) #0 { 14; %result = call half @llvm.experimental.constrained.ldexp.f16.i16(half %a, i16 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") 15; ret half %result 16; } 17 18define half @test_ldexp_f16_i32(ptr addrspace(1) %out, half %a, i32 %b) #0 { 19; GFX8-SDAG-LABEL: test_ldexp_f16_i32: 20; GFX8-SDAG: ; %bb.0: 21; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000 23; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff 24; GFX8-SDAG-NEXT: v_med3_i32 v0, v3, s4, v0 25; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 26; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 27; 28; GFX9-SDAG-LABEL: test_ldexp_f16_i32: 29; GFX9-SDAG: ; %bb.0: 30; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 31; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000 32; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff 33; GFX9-SDAG-NEXT: v_med3_i32 v0, v3, s4, v0 34; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 35; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 36; 37; GFX11-SDAG-LABEL: test_ldexp_f16_i32: 38; GFX11-SDAG: ; %bb.0: 39; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 40; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 41; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 42; GFX11-SDAG-NEXT: v_med3_i32 v0, v3, s0, 0x7fff 43; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 44; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 45; 46; GFX8-GISEL-LABEL: test_ldexp_f16_i32: 47; GFX8-GISEL: ; %bb.0: 48; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 49; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 50; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff 51; GFX8-GISEL-NEXT: v_med3_i32 v0, v3, v0, v1 52; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 53; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 54; 55; GFX9-GISEL-LABEL: test_ldexp_f16_i32: 56; GFX9-GISEL: ; %bb.0: 57; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 59; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff 60; GFX9-GISEL-NEXT: v_med3_i32 v0, v3, v0, v1 61; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 62; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 63; 64; GFX11-GISEL-LABEL: test_ldexp_f16_i32: 65; GFX11-GISEL: ; %bb.0: 66; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 67; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff 68; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 69; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v3, v0 70; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v2, v0 71; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 72 %result = call half @llvm.experimental.constrained.ldexp.f16.i32(half %a, i32 %b, metadata !"round.dynamic", metadata !"fpexcept.strict") 73 ret half %result 74} 75 76; define <2 x half> @test_ldexp_v2f16_v2i16(ptr addrspace(1) %out, <2 x half> %a, <2 x i16> %b) #0 { 77; %result = call <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i16(<2 x half> %a, <2 x i16> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") 78; ret <2 x half> %result 79; } 80 81define <2 x half> @test_ldexp_v2f16_v2i32(ptr addrspace(1) %out, <2 x half> %a, <2 x i32> %b) #0 { 82; GFX8-SDAG-LABEL: test_ldexp_v2f16_v2i32: 83; GFX8-SDAG: ; %bb.0: 84; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 85; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000 86; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff 87; GFX8-SDAG-NEXT: v_med3_i32 v1, v3, s4, v0 88; GFX8-SDAG-NEXT: v_med3_i32 v0, v4, s4, v0 89; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v1, v2, v1 90; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 91; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v1, v0 92; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 93; 94; GFX9-SDAG-LABEL: test_ldexp_v2f16_v2i32: 95; GFX9-SDAG: ; %bb.0: 96; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 97; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000 98; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff 99; GFX9-SDAG-NEXT: v_med3_i32 v1, v3, s4, v0 100; GFX9-SDAG-NEXT: v_med3_i32 v0, v4, s4, v0 101; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v1, v2, v1 102; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 103; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x5040100 104; GFX9-SDAG-NEXT: v_perm_b32 v0, v0, v1, s4 105; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 106; 107; GFX11-SDAG-LABEL: test_ldexp_v2f16_v2i32: 108; GFX11-SDAG: ; %bb.0: 109; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 110; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 111; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) 112; GFX11-SDAG-NEXT: v_med3_i32 v0, v3, s0, 0x7fff 113; GFX11-SDAG-NEXT: v_med3_i32 v1, v4, s0, 0x7fff 114; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v2 115; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 116; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 117; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v3, v1 118; GFX11-SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 119; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 120; 121; GFX8-GISEL-LABEL: test_ldexp_v2f16_v2i32: 122; GFX8-GISEL: ; %bb.0: 123; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 124; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 125; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff 126; GFX8-GISEL-NEXT: v_med3_i32 v3, v3, v0, v1 127; GFX8-GISEL-NEXT: v_med3_i32 v0, v4, v0, v1 128; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v3, v2, v3 129; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 130; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v3, v0 131; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 132; 133; GFX9-GISEL-LABEL: test_ldexp_v2f16_v2i32: 134; GFX9-GISEL: ; %bb.0: 135; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 136; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 137; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff 138; GFX9-GISEL-NEXT: v_med3_i32 v3, v3, v0, v1 139; GFX9-GISEL-NEXT: v_med3_i32 v0, v4, v0, v1 140; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v3, v2, v3 141; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 142; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v3 143; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 144; 145; GFX11-GISEL-LABEL: test_ldexp_v2f16_v2i32: 146; GFX11-GISEL: ; %bb.0: 147; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 148; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff 149; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) 150; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v3, v0 151; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2 152; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v4, v0 153; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v2, v1 154; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 155; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v0, v3, v0 156; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 157; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 158; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1 159; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 160 %result = call <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i32(<2 x half> %a, <2 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") 161 ret <2 x half> %result 162} 163 164define <3 x half> @test_ldexp_v3f16_v3i32(ptr addrspace(1) %out, <3 x half> %a, <3 x i32> %b) #0 { 165; GFX8-SDAG-LABEL: test_ldexp_v3f16_v3i32: 166; GFX8-SDAG: ; %bb.0: 167; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 168; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000 169; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, 0x7fff 170; GFX8-SDAG-NEXT: v_med3_i32 v0, v4, s4, v1 171; GFX8-SDAG-NEXT: v_med3_i32 v4, v5, s4, v1 172; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 173; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 174; GFX8-SDAG-NEXT: v_med3_i32 v1, v6, s4, v1 175; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v2 176; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v1, v3, v1 177; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 178; 179; GFX9-SDAG-LABEL: test_ldexp_v3f16_v3i32: 180; GFX9-SDAG: ; %bb.0: 181; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 182; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000 183; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0x7fff 184; GFX9-SDAG-NEXT: v_med3_i32 v0, v4, s4, v1 185; GFX9-SDAG-NEXT: v_med3_i32 v4, v5, s4, v1 186; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 187; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 188; GFX9-SDAG-NEXT: s_mov_b32 s5, 0x5040100 189; GFX9-SDAG-NEXT: v_med3_i32 v1, v6, s4, v1 190; GFX9-SDAG-NEXT: v_perm_b32 v0, v2, v0, s5 191; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v1, v3, v1 192; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 193; 194; GFX11-SDAG-LABEL: test_ldexp_v3f16_v3i32: 195; GFX11-SDAG: ; %bb.0: 196; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 197; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 198; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) 199; GFX11-SDAG-NEXT: v_med3_i32 v0, v4, s0, 0x7fff 200; GFX11-SDAG-NEXT: v_med3_i32 v1, v5, s0, 0x7fff 201; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v2 202; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 203; GFX11-SDAG-NEXT: v_med3_i32 v2, v6, s0, 0x7fff 204; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) 205; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v4, v1 206; GFX11-SDAG-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 207; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) 208; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v3, v2 209; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 210; 211; GFX8-GISEL-LABEL: test_ldexp_v3f16_v3i32: 212; GFX8-GISEL: ; %bb.0: 213; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 214; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 215; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff 216; GFX8-GISEL-NEXT: v_med3_i32 v4, v4, v0, v1 217; GFX8-GISEL-NEXT: v_med3_i32 v5, v5, v0, v1 218; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v4, v2, v4 219; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 220; GFX8-GISEL-NEXT: v_med3_i32 v0, v6, v0, v1 221; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v0 222; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v2 223; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 224; 225; GFX9-GISEL-LABEL: test_ldexp_v3f16_v3i32: 226; GFX9-GISEL: ; %bb.0: 227; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 228; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 229; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff 230; GFX9-GISEL-NEXT: v_med3_i32 v4, v4, v0, v1 231; GFX9-GISEL-NEXT: v_med3_i32 v5, v5, v0, v1 232; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v4, v2, v4 233; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 234; GFX9-GISEL-NEXT: v_med3_i32 v0, v6, v0, v1 235; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v0 236; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v4 237; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 238; 239; GFX11-GISEL-LABEL: test_ldexp_v3f16_v3i32: 240; GFX11-GISEL: ; %bb.0: 241; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 242; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff 243; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) 244; GFX11-GISEL-NEXT: v_med3_i32 v1, 0xffff8000, v4, v0 245; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2 246; GFX11-GISEL-NEXT: v_med3_i32 v5, 0xffff8000, v5, v0 247; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v2, v1 248; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) 249; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v4, v5 250; GFX11-GISEL-NEXT: v_med3_i32 v4, 0xffff8000, v6, v0 251; GFX11-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 252; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 253; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v1 254; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v3, v4 255; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 256 %result = call <3 x half> @llvm.experimental.constrained.ldexp.v3f16.v3i32(<3 x half> %a, <3 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") 257 ret <3 x half> %result 258} 259 260define <4 x half> @test_ldexp_v4f16_v4i32(ptr addrspace(1) %out, <4 x half> %a, <4 x i32> %b) #0 { 261; GFX8-SDAG-LABEL: test_ldexp_v4f16_v4i32: 262; GFX8-SDAG: ; %bb.0: 263; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 264; GFX8-SDAG-NEXT: s_movk_i32 s4, 0x8000 265; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff 266; GFX8-SDAG-NEXT: v_med3_i32 v1, v7, s4, v0 267; GFX8-SDAG-NEXT: v_med3_i32 v6, v6, s4, v0 268; GFX8-SDAG-NEXT: v_med3_i32 v5, v5, s4, v0 269; GFX8-SDAG-NEXT: v_med3_i32 v0, v4, s4, v0 270; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 271; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v3, v3, v6 272; GFX8-SDAG-NEXT: v_ldexp_f16_sdwa v5, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 273; GFX8-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v0 274; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v5 275; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v3, v1 276; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] 277; 278; GFX9-SDAG-LABEL: test_ldexp_v4f16_v4i32: 279; GFX9-SDAG: ; %bb.0: 280; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 281; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x8000 282; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0x7fff 283; GFX9-SDAG-NEXT: v_med3_i32 v1, v6, s4, v0 284; GFX9-SDAG-NEXT: v_med3_i32 v6, v7, s4, v0 285; GFX9-SDAG-NEXT: v_med3_i32 v4, v4, s4, v0 286; GFX9-SDAG-NEXT: v_med3_i32 v0, v5, s4, v0 287; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v1, v3, v1 288; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v3, v3, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 289; GFX9-SDAG-NEXT: v_ldexp_f16_e32 v4, v2, v4 290; GFX9-SDAG-NEXT: v_ldexp_f16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 291; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x5040100 292; GFX9-SDAG-NEXT: v_perm_b32 v0, v0, v4, s4 293; GFX9-SDAG-NEXT: v_perm_b32 v1, v3, v1, s4 294; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] 295; 296; GFX11-SDAG-LABEL: test_ldexp_v4f16_v4i32: 297; GFX11-SDAG: ; %bb.0: 298; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 299; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x8000 300; GFX11-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 301; GFX11-SDAG-NEXT: v_med3_i32 v0, v6, s0, 0x7fff 302; GFX11-SDAG-NEXT: v_med3_i32 v1, v7, s0, 0x7fff 303; GFX11-SDAG-NEXT: v_med3_i32 v4, v4, s0, 0x7fff 304; GFX11-SDAG-NEXT: v_med3_i32 v5, v5, s0, 0x7fff 305; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v2 306; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v7, 16, v3 307; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v3, v3, v0 308; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v0, v2, v4 309; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 310; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v2, v6, v5 311; GFX11-SDAG-NEXT: v_ldexp_f16_e32 v1, v7, v1 312; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 313; GFX11-SDAG-NEXT: v_perm_b32 v0, v2, v0, 0x5040100 314; GFX11-SDAG-NEXT: v_perm_b32 v1, v1, v3, 0x5040100 315; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] 316; 317; GFX8-GISEL-LABEL: test_ldexp_v4f16_v4i32: 318; GFX8-GISEL: ; %bb.0: 319; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 320; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 321; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff 322; GFX8-GISEL-NEXT: v_med3_i32 v4, v4, v0, v1 323; GFX8-GISEL-NEXT: v_med3_i32 v5, v5, v0, v1 324; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v4, v2, v4 325; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 326; GFX8-GISEL-NEXT: v_med3_i32 v5, v6, v0, v1 327; GFX8-GISEL-NEXT: v_med3_i32 v0, v7, v0, v1 328; GFX8-GISEL-NEXT: v_ldexp_f16_e32 v5, v3, v5 329; GFX8-GISEL-NEXT: v_ldexp_f16_sdwa v1, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 330; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v4, v2 331; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v5, v1 332; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] 333; 334; GFX9-GISEL-LABEL: test_ldexp_v4f16_v4i32: 335; GFX9-GISEL: ; %bb.0: 336; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 337; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0xffff8000 338; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0x7fff 339; GFX9-GISEL-NEXT: v_med3_i32 v4, v4, v0, v1 340; GFX9-GISEL-NEXT: v_med3_i32 v5, v5, v0, v1 341; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v4, v2, v4 342; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 343; GFX9-GISEL-NEXT: v_med3_i32 v5, v6, v0, v1 344; GFX9-GISEL-NEXT: v_med3_i32 v0, v7, v0, v1 345; GFX9-GISEL-NEXT: v_ldexp_f16_e32 v5, v3, v5 346; GFX9-GISEL-NEXT: v_ldexp_f16_sdwa v1, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 347; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v4 348; GFX9-GISEL-NEXT: v_lshl_or_b32 v1, v1, 16, v5 349; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] 350; 351; GFX11-GISEL-LABEL: test_ldexp_v4f16_v4i32: 352; GFX11-GISEL: ; %bb.0: 353; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 354; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0x7fff 355; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v2 356; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v3 357; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_3) | instid1(VALU_DEP_4) 358; GFX11-GISEL-NEXT: v_med3_i32 v4, 0xffff8000, v4, v0 359; GFX11-GISEL-NEXT: v_med3_i32 v6, 0xffff8000, v6, v0 360; GFX11-GISEL-NEXT: v_med3_i32 v5, 0xffff8000, v5, v0 361; GFX11-GISEL-NEXT: v_med3_i32 v0, 0xffff8000, v7, v0 362; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v2, v2, v4 363; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 364; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v3, v3, v6 365; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v1, v1, v5 366; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 367; GFX11-GISEL-NEXT: v_ldexp_f16_e32 v4, v8, v0 368; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v2 369; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 370; GFX11-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3 371; GFX11-GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 372; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) 373; GFX11-GISEL-NEXT: v_lshl_or_b32 v1, v4, 16, v2 374; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] 375 %result = call <4 x half> @llvm.experimental.constrained.ldexp.v4f16.v4i32(<4 x half> %a, <4 x i32> %b, metadata !"round.dynamic", metadata !"fpexcept.strict") 376 ret <4 x half> %result 377} 378 379declare half @llvm.experimental.constrained.ldexp.f16.i16(half, i16, metadata, metadata) #1 380declare half @llvm.experimental.constrained.ldexp.f16.i32(half, i32, metadata, metadata) #1 381declare <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i16(<2 x half>, <2 x i16>, metadata, metadata) #1 382declare <2 x half> @llvm.experimental.constrained.ldexp.v2f16.v2i32(<2 x half>, <2 x i32>, metadata, metadata) #1 383declare <3 x half> @llvm.experimental.constrained.ldexp.v3f16.v3i32(<3 x half>, <3 x i32>, metadata, metadata) #1 384declare <4 x half> @llvm.experimental.constrained.ldexp.v4f16.v4i32(<4 x half>, <4 x i32>, metadata, metadata) #1 385 386attributes #0 = { strictfp } 387attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } 388;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 389; GCN: {{.*}} 390; GFX11: {{.*}} 391; GFX8: {{.*}} 392; GFX9: {{.*}} 393