1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=CHECK 4 5declare <8 x half> @llvm.experimental.constrained.fadd.v8f16(<8 x half>, <8 x half>, metadata, metadata) 6declare <8 x half> @llvm.experimental.constrained.fsub.v8f16(<8 x half>, <8 x half>, metadata, metadata) 7declare <8 x half> @llvm.experimental.constrained.fmul.v8f16(<8 x half>, <8 x half>, metadata, metadata) 8declare <8 x half> @llvm.experimental.constrained.fdiv.v8f16(<8 x half>, <8 x half>, metadata, metadata) 9declare <8 x half> @llvm.experimental.constrained.sqrt.v8f16(<8 x half>, metadata, metadata) 10declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) 11declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata) 12declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32(<2 x float>, metadata, metadata) 13declare <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32(<4 x float>, metadata, metadata) 14declare <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64(<2 x double>, metadata, metadata) 15declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) 16declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) 17declare <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16(<2 x half>, metadata) 18declare <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16(<4 x half>, metadata) 19declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16(<2 x half>, metadata) 20declare <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half>, <8 x half>, <8 x half>, metadata, metadata) 21 22define <8 x half> @f2(<8 x half> %a, <8 x half> %b) #0 { 23; CHECK-LABEL: f2: 24; CHECK: # %bb.0: 25; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0 26; CHECK-NEXT: ret{{[l|q]}} 27 %ret = call <8 x half> @llvm.experimental.constrained.fadd.v8f16(<8 x half> %a, <8 x half> %b, 28 metadata !"round.dynamic", 29 metadata !"fpexcept.strict") #0 30 ret <8 x half> %ret 31} 32 33define <8 x half> @f4(<8 x half> %a, <8 x half> %b) #0 { 34; CHECK-LABEL: f4: 35; CHECK: # %bb.0: 36; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0 37; CHECK-NEXT: ret{{[l|q]}} 38 %ret = call <8 x half> @llvm.experimental.constrained.fsub.v8f16(<8 x half> %a, <8 x half> %b, 39 metadata !"round.dynamic", 40 metadata !"fpexcept.strict") #0 41 ret <8 x half> %ret 42} 43 44define <8 x half> @f6(<8 x half> %a, <8 x half> %b) #0 { 45; CHECK-LABEL: f6: 46; CHECK: # %bb.0: 47; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm0 48; CHECK-NEXT: ret{{[l|q]}} 49 %ret = call <8 x half> @llvm.experimental.constrained.fmul.v8f16(<8 x half> %a, <8 x half> %b, 50 metadata !"round.dynamic", 51 metadata !"fpexcept.strict") #0 52 ret <8 x half> %ret 53} 54 55define <8 x half> @f8(<8 x half> %a, <8 x half> %b) #0 { 56; CHECK-LABEL: f8: 57; CHECK: # %bb.0: 58; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm0 59; CHECK-NEXT: ret{{[l|q]}} 60 %ret = call <8 x half> @llvm.experimental.constrained.fdiv.v8f16(<8 x half> %a, <8 x half> %b, 61 metadata !"round.dynamic", 62 metadata !"fpexcept.strict") #0 63 ret <8 x half> %ret 64} 65 66define <8 x half> @f10(<8 x half> %a) #0 { 67; CHECK-LABEL: f10: 68; CHECK: # %bb.0: 69; CHECK-NEXT: vsqrtph %xmm0, %xmm0 70; CHECK-NEXT: ret{{[l|q]}} 71 %sqrt = call <8 x half> @llvm.experimental.constrained.sqrt.v8f16( 72 <8 x half> %a, 73 metadata !"round.dynamic", 74 metadata !"fpexcept.strict") #0 75 ret <8 x half > %sqrt 76} 77 78define <8 x half> @f11(<2 x double> %a0, <8 x half> %a1) #0 { 79; CHECK-LABEL: f11: 80; CHECK: # %bb.0: 81; CHECK-NEXT: vcvtsd2sh %xmm0, %xmm0, %xmm0 82; CHECK-NEXT: vmovsh %xmm0, %xmm1, %xmm0 83; CHECK-NEXT: ret{{[l|q]}} 84 %ext = extractelement <2 x double> %a0, i32 0 85 %cvt = call half @llvm.experimental.constrained.fptrunc.f16.f64(double %ext, 86 metadata !"round.dynamic", 87 metadata !"fpexcept.strict") #0 88 %res = insertelement <8 x half> %a1, half %cvt, i32 0 89 ret <8 x half> %res 90} 91 92define <2 x double> @f12(<2 x double> %a0, <8 x half> %a1) #0 { 93; CHECK-LABEL: f12: 94; CHECK: # %bb.0: 95; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm1, %xmm1 96; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 97; CHECK-NEXT: ret{{[l|q]}} 98 %ext = extractelement <8 x half> %a1, i32 0 99 %cvt = call double @llvm.experimental.constrained.fpext.f64.f16(half %ext, 100 metadata !"fpexcept.strict") #0 101 %res = insertelement <2 x double> %a0, double %cvt, i32 0 102 ret <2 x double> %res 103} 104 105define <8 x half> @f13(<8 x half> %a, <8 x half> %b, <8 x half> %c) #0 { 106; CHECK-LABEL: f13: 107; CHECK: # %bb.0: 108; CHECK-NEXT: vfmadd213ph %xmm2, %xmm1, %xmm0 109; CHECK-NEXT: ret{{[l|q]}} 110 %res = call <8 x half> @llvm.experimental.constrained.fma.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, 111 metadata !"round.dynamic", 112 metadata !"fpexcept.strict") #0 113 ret <8 x half> %res 114} 115 116define <2 x double> @f15(<2 x half> %a) #0 { 117; CHECK-LABEL: f15: 118; CHECK: # %bb.0: 119; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0 120; CHECK-NEXT: ret{{[l|q]}} 121 %ret = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f16( 122 <2 x half> %a, 123 metadata !"fpexcept.strict") #0 124 ret <2 x double> %ret 125} 126 127define <2 x half> @f16(<2 x double> %a) #0 { 128; CHECK-LABEL: f16: 129; CHECK: # %bb.0: 130; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm0 131; CHECK-NEXT: ret{{[l|q]}} 132 %ret = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f64( 133 <2 x double> %a, 134 metadata !"round.dynamic", 135 metadata !"fpexcept.strict") #0 136 ret <2 x half> %ret 137} 138 139define <8 x half> @f17(<4 x float> %a0, <8 x half> %a1) #0 { 140; CHECK-LABEL: f17: 141; CHECK: # %bb.0: 142; CHECK-NEXT: vcvtss2sh %xmm0, %xmm0, %xmm0 143; CHECK-NEXT: vmovsh %xmm0, %xmm1, %xmm0 144; CHECK-NEXT: ret{{[l|q]}} 145 %ext = extractelement <4 x float> %a0, i32 0 146 %cvt = call half @llvm.experimental.constrained.fptrunc.f16.f32(float %ext, 147 metadata !"round.dynamic", 148 metadata !"fpexcept.strict") #0 149 %res = insertelement <8 x half> %a1, half %cvt, i32 0 150 ret <8 x half> %res 151} 152 153define <4 x float> @f18(<4 x float> %a0, <8 x half> %a1) #0 { 154; CHECK-LABEL: f18: 155; CHECK: # %bb.0: 156; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm1, %xmm1 157; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 158; CHECK-NEXT: ret{{[l|q]}} 159 %ext = extractelement <8 x half> %a1, i32 0 160 %cvt = call float @llvm.experimental.constrained.fpext.f32.f16(half %ext, 161 metadata !"fpexcept.strict") #0 162 %res = insertelement <4 x float> %a0, float %cvt, i32 0 163 ret <4 x float> %res 164} 165 166define <2 x float> @f19(<2 x half> %a) #0 { 167; CHECK-LABEL: f19: 168; CHECK: # %bb.0: 169; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 170; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] 171; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 172; CHECK-NEXT: ret{{[l|q]}} 173 %ret = call <2 x float> @llvm.experimental.constrained.fpext.v2f32.v2f16( 174 <2 x half> %a, 175 metadata !"fpexcept.strict") #0 176 ret <2 x float> %ret 177} 178 179define <4 x float> @f20(<4 x half> %a) #0 { 180; CHECK-LABEL: f20: 181; CHECK: # %bb.0: 182; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 183; CHECK-NEXT: ret{{[l|q]}} 184 %ret = call <4 x float> @llvm.experimental.constrained.fpext.v4f32.v4f16( 185 <4 x half> %a, 186 metadata !"fpexcept.strict") #0 187 ret <4 x float> %ret 188} 189 190define <2 x half> @f21(<2 x float> %a) #0 { 191; CHECK-LABEL: f21: 192; CHECK: # %bb.0: 193; CHECK-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 194; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0 195; CHECK-NEXT: ret{{[l|q]}} 196 %ret = call <2 x half> @llvm.experimental.constrained.fptrunc.v2f16.v2f32( 197 <2 x float> %a, 198 metadata !"round.dynamic", 199 metadata !"fpexcept.strict") #0 200 ret <2 x half> %ret 201} 202 203define <4 x half> @f22(<4 x float> %a) #0 { 204; CHECK-LABEL: f22: 205; CHECK: # %bb.0: 206; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0 207; CHECK-NEXT: ret{{[l|q]}} 208 %ret = call <4 x half> @llvm.experimental.constrained.fptrunc.v4f16.v4f32( 209 <4 x float> %a, 210 metadata !"round.dynamic", 211 metadata !"fpexcept.strict") #0 212 ret <4 x half> %ret 213} 214 215attributes #0 = { strictfp } 216