1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx803 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX8 %s 4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX89,GFX9 %s 5; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s 6; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s 7 8define float @v_constrained_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 { 9; SI-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict: 10; SI: ; %bb.0: 11; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 13; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 14; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 15; SI-NEXT: s_setpc_b64 s[30:31] 16; 17; GFX89-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict: 18; GFX89: ; %bb.0: 19; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX89-NEXT: v_cvt_f32_f16_e32 v0, v0 21; GFX89-NEXT: s_setpc_b64 s[30:31] 22; 23; GFX1011-LABEL: v_constrained_fpext_f16_to_f32_fpexcept_strict: 24; GFX1011: ; %bb.0: 25; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 26; GFX1011-NEXT: v_cvt_f32_f16_e32 v0, v0 27; GFX1011-NEXT: s_setpc_b64 s[30:31] 28 %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict") 29 ret float %result 30} 31 32define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict(<2 x half> %arg) #0 { 33; SI-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict: 34; SI: ; %bb.0: 35; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 37; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 38; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 39; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 40; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 41; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 42; SI-NEXT: s_setpc_b64 s[30:31] 43; 44; GFX89-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict: 45; GFX89: ; %bb.0: 46; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 47; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v0 48; GFX89-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 49; GFX89-NEXT: v_mov_b32_e32 v0, v2 50; GFX89-NEXT: s_setpc_b64 s[30:31] 51; 52; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict: 53; GFX10: ; %bb.0: 54; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 55; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0 56; GFX10-NEXT: v_cvt_f32_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 57; GFX10-NEXT: v_mov_b32_e32 v0, v2 58; GFX10-NEXT: s_setpc_b64 s[30:31] 59; 60; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f32_fpexcept_strict: 61; GFX11: ; %bb.0: 62; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 63; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 64; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 65; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1 66; GFX11-NEXT: s_setpc_b64 s[30:31] 67 %result = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %arg, metadata !"fpexcept.strict") 68 ret <2 x float> %result 69} 70 71define <3 x float> @v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict(<3 x half> %arg) #0 { 72; SI-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict: 73; SI: ; %bb.0: 74; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 76; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 77; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 78; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 79; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 80; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 81; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 82; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 83; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 84; SI-NEXT: s_setpc_b64 s[30:31] 85; 86; GFX89-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict: 87; GFX89: ; %bb.0: 88; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; GFX89-NEXT: v_cvt_f32_f16_e32 v4, v0 90; GFX89-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 91; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v1 92; GFX89-NEXT: v_mov_b32_e32 v0, v4 93; GFX89-NEXT: v_mov_b32_e32 v1, v3 94; GFX89-NEXT: s_setpc_b64 s[30:31] 95; 96; GFX10-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict: 97; GFX10: ; %bb.0: 98; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 99; GFX10-NEXT: v_cvt_f32_f16_e32 v4, v0 100; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 101; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1 102; GFX10-NEXT: v_mov_b32_e32 v0, v4 103; GFX10-NEXT: v_mov_b32_e32 v1, v3 104; GFX10-NEXT: s_setpc_b64 s[30:31] 105; 106; GFX11-LABEL: v_constrained_fpext_v3f16_to_v3f32_fpexcept_strict: 107; GFX11: ; %bb.0: 108; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 109; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 110; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 111; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v2 112; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1 113; GFX11-NEXT: v_mov_b32_e32 v1, v3 114; GFX11-NEXT: s_setpc_b64 s[30:31] 115 %result = call <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half> %arg, metadata !"fpexcept.strict") 116 ret <3 x float> %result 117} 118 119define double @v_constrained_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 { 120; GCN-LABEL: v_constrained_fpext_f32_to_f64_fpexcept_strict: 121; GCN: ; %bb.0: 122; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 123; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 124; GCN-NEXT: s_setpc_b64 s[30:31] 125 %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict") 126 ret double %result 127} 128 129define <2 x double> @v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict(<2 x float> %arg) #0 { 130; GCN-LABEL: v_constrained_fpext_v2f32_to_v2f64_fpexcept_strict: 131; GCN: ; %bb.0: 132; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GCN-NEXT: v_mov_b32_e32 v2, v1 134; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 135; GCN-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 136; GCN-NEXT: s_setpc_b64 s[30:31] 137 %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float> %arg, metadata !"fpexcept.strict") 138 ret <2 x double> %result 139} 140 141define <3 x double> @v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict(<3 x float> %arg) #0 { 142; SI-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict: 143; SI: ; %bb.0: 144; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 145; SI-NEXT: v_mov_b32_e32 v4, v2 146; SI-NEXT: v_mov_b32_e32 v2, v1 147; SI-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 148; SI-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 149; SI-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 150; SI-NEXT: s_setpc_b64 s[30:31] 151; 152; GFX89-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict: 153; GFX89: ; %bb.0: 154; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 155; GFX89-NEXT: v_mov_b32_e32 v4, v2 156; GFX89-NEXT: v_mov_b32_e32 v2, v1 157; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 158; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 159; GFX89-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 160; GFX89-NEXT: s_setpc_b64 s[30:31] 161; 162; GFX1011-LABEL: v_constrained_fpext_v3f32_to_v3f64_fpexcept_strict: 163; GFX1011: ; %bb.0: 164; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 165; GFX1011-NEXT: v_mov_b32_e32 v4, v2 166; GFX1011-NEXT: v_mov_b32_e32 v2, v1 167; GFX1011-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 168; GFX1011-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 169; GFX1011-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 170; GFX1011-NEXT: s_setpc_b64 s[30:31] 171 %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float> %arg, metadata !"fpexcept.strict") 172 ret <3 x double> %result 173} 174 175define double @v_constrained_fpext_f16_to_f64_fpexcept_strict(half %arg) #0 { 176; SI-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict: 177; SI: ; %bb.0: 178; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 179; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 180; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 181; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 182; SI-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 183; SI-NEXT: s_setpc_b64 s[30:31] 184; 185; GFX89-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict: 186; GFX89: ; %bb.0: 187; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 188; GFX89-NEXT: v_cvt_f32_f16_e32 v0, v0 189; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 190; GFX89-NEXT: s_setpc_b64 s[30:31] 191; 192; GFX1011-LABEL: v_constrained_fpext_f16_to_f64_fpexcept_strict: 193; GFX1011: ; %bb.0: 194; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 195; GFX1011-NEXT: v_cvt_f32_f16_e32 v0, v0 196; GFX1011-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 197; GFX1011-NEXT: s_setpc_b64 s[30:31] 198 %result = call double @llvm.experimental.constrained.fpext.f64.f16(half %arg, metadata !"fpexcept.strict") 199 ret double %result 200} 201 202define <2 x double> @v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict(<2 x half> %arg) #0 { 203; SI-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict: 204; SI: ; %bb.0: 205; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 207; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 208; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 209; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 210; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 211; SI-NEXT: v_cvt_f32_f16_e32 v2, v1 212; SI-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 213; SI-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 214; SI-NEXT: s_setpc_b64 s[30:31] 215; 216; GFX89-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict: 217; GFX89: ; %bb.0: 218; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 219; GFX89-NEXT: v_cvt_f32_f16_e32 v1, v0 220; GFX89-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 221; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v1 222; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 223; GFX89-NEXT: s_setpc_b64 s[30:31] 224; 225; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict: 226; GFX10: ; %bb.0: 227; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 228; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v0 229; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 230; GFX10-NEXT: v_cvt_f64_f32_e32 v[0:1], v1 231; GFX10-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 232; GFX10-NEXT: s_setpc_b64 s[30:31] 233; 234; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f64_fpexcept_strict: 235; GFX11: ; %bb.0: 236; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 237; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 238; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 239; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v1 240; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 241; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 242; GFX11-NEXT: s_setpc_b64 s[30:31] 243 %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half> %arg, metadata !"fpexcept.strict") 244 ret <2 x double> %result 245} 246 247define <3 x double> @v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict(<3 x half> %arg) #0 { 248; SI-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict: 249; SI: ; %bb.0: 250; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 251; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 252; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 253; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 254; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 255; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 256; SI-NEXT: v_and_b32_e32 v1, 0xffff, v1 257; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 258; SI-NEXT: v_cvt_f32_f16_e32 v3, v1 259; SI-NEXT: v_cvt_f32_f16_e32 v4, v2 260; SI-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 261; SI-NEXT: v_cvt_f64_f32_e32 v[2:3], v3 262; SI-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 263; SI-NEXT: s_setpc_b64 s[30:31] 264; 265; GFX89-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict: 266; GFX89: ; %bb.0: 267; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 268; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v0 269; GFX89-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 270; GFX89-NEXT: v_cvt_f32_f16_e32 v4, v1 271; GFX89-NEXT: v_cvt_f64_f32_e32 v[0:1], v2 272; GFX89-NEXT: v_cvt_f64_f32_e32 v[2:3], v3 273; GFX89-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 274; GFX89-NEXT: s_setpc_b64 s[30:31] 275; 276; GFX10-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict: 277; GFX10: ; %bb.0: 278; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 279; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v0 280; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 281; GFX10-NEXT: v_cvt_f32_f16_e32 v4, v1 282; GFX10-NEXT: v_cvt_f64_f32_e32 v[0:1], v2 283; GFX10-NEXT: v_cvt_f64_f32_e32 v[2:3], v3 284; GFX10-NEXT: v_cvt_f64_f32_e32 v[4:5], v4 285; GFX10-NEXT: s_setpc_b64 s[30:31] 286; 287; GFX11-LABEL: v_constrained_fpext_v3f16_to_v2f64_fpexcept_strict: 288; GFX11: ; %bb.0: 289; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 290; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 291; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 292; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v1 293; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2 294; GFX11-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 295; GFX11-NEXT: v_cvt_f64_f32_e32 v[4:5], v3 296; GFX11-NEXT: v_cvt_f64_f32_e32 v[2:3], v2 297; GFX11-NEXT: s_setpc_b64 s[30:31] 298 %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half> %arg, metadata !"fpexcept.strict") 299 ret <3 x double> %result 300} 301 302define float @v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict(half %arg) #0 { 303; SI-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict: 304; SI: ; %bb.0: 305; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 306; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 307; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 308; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 309; SI-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 310; SI-NEXT: s_setpc_b64 s[30:31] 311; 312; GFX89-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict: 313; GFX89: ; %bb.0: 314; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 315; GFX89-NEXT: v_cvt_f32_f16_e32 v0, v0 316; GFX89-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 317; GFX89-NEXT: s_setpc_b64 s[30:31] 318; 319; GFX1011-LABEL: v_constrained_fneg_fpext_f16_to_f32_fpexcept_strict: 320; GFX1011: ; %bb.0: 321; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 322; GFX1011-NEXT: v_cvt_f32_f16_e32 v0, v0 323; GFX1011-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 324; GFX1011-NEXT: s_setpc_b64 s[30:31] 325 %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %arg, metadata !"fpexcept.strict") 326 %neg.result = fneg float %result 327 ret float %neg.result 328} 329 330define float @v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict(half %arg) #0 { 331; SI-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict: 332; SI: ; %bb.0: 333; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 334; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 335; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 336; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 337; SI-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 338; SI-NEXT: s_setpc_b64 s[30:31] 339; 340; GFX89-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict: 341; GFX89: ; %bb.0: 342; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 343; GFX89-NEXT: v_cvt_f32_f16_e64 v0, -v0 344; GFX89-NEXT: s_setpc_b64 s[30:31] 345; 346; GFX1011-LABEL: v_constrained_fpext_fneg_f16_to_f32_fpexcept_strict: 347; GFX1011: ; %bb.0: 348; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 349; GFX1011-NEXT: v_cvt_f32_f16_e64 v0, -v0 350; GFX1011-NEXT: s_setpc_b64 s[30:31] 351 %neg.arg = fneg half %arg 352 %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %neg.arg, metadata !"fpexcept.strict") 353 ret float %result 354} 355 356define double @v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict(float %arg) #0 { 357; GCN-LABEL: v_constrained_fpext_fneg_f32_to_f64_fpexcept_strict: 358; GCN: ; %bb.0: 359; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 360; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 361; GCN-NEXT: s_setpc_b64 s[30:31] 362 %neg.arg = fneg float %arg 363 %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict") 364 ret double %result 365} 366 367define double @v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict(float %arg) #0 { 368; GCN-LABEL: v_constrained_fneg_fpext_f32_to_f64_fpexcept_strict: 369; GCN: ; %bb.0: 370; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 371; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 372; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 373; GCN-NEXT: s_setpc_b64 s[30:31] 374 %result = call double @llvm.experimental.constrained.fpext.f64.f32(float %arg, metadata !"fpexcept.strict") 375 %neg.result = fneg double %result 376 ret double %neg.result 377} 378 379define float @v_constrained_fpext_f16_to_f32_noabi(ptr addrspace(1) %ptr) #0 { 380; SI-LABEL: v_constrained_fpext_f16_to_f32_noabi: 381; SI: ; %bb.0: 382; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 383; SI-NEXT: s_mov_b32 s6, 0 384; SI-NEXT: s_mov_b32 s7, 0xf000 385; SI-NEXT: s_mov_b32 s4, s6 386; SI-NEXT: s_mov_b32 s5, s6 387; SI-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 388; SI-NEXT: s_waitcnt vmcnt(0) 389; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 390; SI-NEXT: s_setpc_b64 s[30:31] 391; 392; GFX8-LABEL: v_constrained_fpext_f16_to_f32_noabi: 393; GFX8: ; %bb.0: 394; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 395; GFX8-NEXT: flat_load_ushort v0, v[0:1] 396; GFX8-NEXT: s_waitcnt vmcnt(0) 397; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 398; GFX8-NEXT: s_setpc_b64 s[30:31] 399; 400; GFX9-LABEL: v_constrained_fpext_f16_to_f32_noabi: 401; GFX9: ; %bb.0: 402; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 403; GFX9-NEXT: global_load_ushort v0, v[0:1], off 404; GFX9-NEXT: s_waitcnt vmcnt(0) 405; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 406; GFX9-NEXT: s_setpc_b64 s[30:31] 407; 408; GFX10-LABEL: v_constrained_fpext_f16_to_f32_noabi: 409; GFX10: ; %bb.0: 410; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 411; GFX10-NEXT: global_load_ushort v0, v[0:1], off 412; GFX10-NEXT: s_waitcnt vmcnt(0) 413; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 414; GFX10-NEXT: s_setpc_b64 s[30:31] 415; 416; GFX11-LABEL: v_constrained_fpext_f16_to_f32_noabi: 417; GFX11: ; %bb.0: 418; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 419; GFX11-NEXT: global_load_u16 v0, v[0:1], off 420; GFX11-NEXT: s_waitcnt vmcnt(0) 421; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 422; GFX11-NEXT: s_setpc_b64 s[30:31] 423 %val = load half, ptr addrspace(1) %ptr 424 %result = call float @llvm.experimental.constrained.fpext.f32.f16(half %val, metadata !"fpexcept.strict") 425 ret float %result 426} 427 428define <2 x float> @v_constrained_fpext_v2f16_to_v2f32_noabi(ptr addrspace(1) %ptr) #0 { 429; SI-LABEL: v_constrained_fpext_v2f16_to_v2f32_noabi: 430; SI: ; %bb.0: 431; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 432; SI-NEXT: s_mov_b32 s6, 0 433; SI-NEXT: s_mov_b32 s7, 0xf000 434; SI-NEXT: s_mov_b32 s4, s6 435; SI-NEXT: s_mov_b32 s5, s6 436; SI-NEXT: buffer_load_dword v1, v[0:1], s[4:7], 0 addr64 437; SI-NEXT: s_waitcnt vmcnt(0) 438; SI-NEXT: v_cvt_f32_f16_e32 v0, v1 439; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 440; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 441; SI-NEXT: s_setpc_b64 s[30:31] 442; 443; GFX8-LABEL: v_constrained_fpext_v2f16_to_v2f32_noabi: 444; GFX8: ; %bb.0: 445; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 446; GFX8-NEXT: flat_load_dword v1, v[0:1] 447; GFX8-NEXT: s_waitcnt vmcnt(0) 448; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v1 449; GFX8-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 450; GFX8-NEXT: s_setpc_b64 s[30:31] 451; 452; GFX9-LABEL: v_constrained_fpext_v2f16_to_v2f32_noabi: 453; GFX9: ; %bb.0: 454; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 455; GFX9-NEXT: global_load_dword v1, v[0:1], off 456; GFX9-NEXT: s_waitcnt vmcnt(0) 457; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v1 458; GFX9-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 459; GFX9-NEXT: s_setpc_b64 s[30:31] 460; 461; GFX10-LABEL: v_constrained_fpext_v2f16_to_v2f32_noabi: 462; GFX10: ; %bb.0: 463; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 464; GFX10-NEXT: global_load_dword v1, v[0:1], off 465; GFX10-NEXT: s_waitcnt vmcnt(0) 466; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v1 467; GFX10-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 468; GFX10-NEXT: s_setpc_b64 s[30:31] 469; 470; GFX11-LABEL: v_constrained_fpext_v2f16_to_v2f32_noabi: 471; GFX11: ; %bb.0: 472; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 473; GFX11-NEXT: global_load_b32 v0, v[0:1], off 474; GFX11-NEXT: s_waitcnt vmcnt(0) 475; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 476; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 477; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1 478; GFX11-NEXT: s_setpc_b64 s[30:31] 479 %val = load <2 x half>, ptr addrspace(1) %ptr 480 %result = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half> %val, metadata !"fpexcept.strict") 481 ret <2 x float> %result 482} 483 484declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata) #1 485declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) #1 486declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata) #1 487 488declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) #1 489declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata) #1 490declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f16(<3 x half>, metadata) #1 491 492declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) #1 493declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) #1 494declare <3 x float> @llvm.experimental.constrained.fpext.v3f32.v3f16(<3 x half>, metadata) #1 495 496attributes #0 = { strictfp } 497attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } 498