1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s 3 4declare <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half>, <32 x half>, i32) 5 6define <32 x half> @test_int_x86_avx512fp16_add_ph_512(<32 x half> %x1, <32 x half> %x2) { 7; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_512: 8; CHECK: # %bb.0: 9; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0 10; CHECK-NEXT: retq 11 %res = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) 12 ret <32 x half> %res 13} 14 15define <32 x half> @test_int_x86_avx512fp16_mask_add_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) { 16; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_512: 17; CHECK: # %bb.0: 18; CHECK-NEXT: kmovd %edi, %k1 19; CHECK-NEXT: vaddph %zmm2, %zmm1, %zmm0 {%k1} 20; CHECK-NEXT: retq 21 %mask = bitcast i32 %msk to <32 x i1> 22 %res0 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) 23 %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src 24 ret <32 x half> %res 25} 26 27define <32 x half> @test_int_x86_avx512fp16_maskz_add_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) { 28; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_512: 29; CHECK: # %bb.0: 30; CHECK-NEXT: kmovd %edi, %k1 31; CHECK-NEXT: vaddph %zmm2, %zmm1, %zmm0 {%k1} {z} 32; CHECK-NEXT: vaddph (%rsi), %zmm1, %zmm1 {%k1} {z} 33; CHECK-NEXT: vaddph %zmm1, %zmm0, %zmm0 34; CHECK-NEXT: retq 35 %mask = bitcast i32 %msk to <32 x i1> 36 %val = load <32 x half>, ptr %ptr 37 %res0 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) 38 %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer 39 %t2 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %val, i32 4) 40 %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer 41 %res3 = fadd <32 x half> %res1, %res2 42 ret <32 x half> %res3 43} 44 45define <32 x half> @test_int_x86_avx512fp16_add_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, ptr %ptr) { 46; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_512_round: 47; CHECK: # %bb.0: 48; CHECK-NEXT: kmovd %edi, %k1 49; CHECK-NEXT: vaddph {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 50; CHECK-NEXT: vmovaps %zmm2, %zmm0 51; CHECK-NEXT: retq 52 %mask = bitcast i32 %msk to <32 x i1> 53 %t1 = call <32 x half> @llvm.x86.avx512fp16.add.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10) 54 %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src 55 ret <32 x half> %res 56} 57 58declare <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half>, <32 x half>, i32) 59 60define <32 x half> @test_int_x86_avx512fp16_sub_ph_512(<32 x half> %x1, <32 x half> %x2) { 61; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_512: 62; CHECK: # %bb.0: 63; CHECK-NEXT: vsubph %zmm1, %zmm0, %zmm0 64; CHECK-NEXT: retq 65 %res = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) 66 ret <32 x half> %res 67} 68 69define <32 x half> @test_int_x86_avx512fp16_mask_sub_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) { 70; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_512: 71; CHECK: # %bb.0: 72; CHECK-NEXT: kmovd %edi, %k1 73; CHECK-NEXT: vsubph %zmm2, %zmm1, %zmm0 {%k1} 74; CHECK-NEXT: retq 75 %mask = bitcast i32 %msk to <32 x i1> 76 %res0 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) 77 %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src 78 ret <32 x half> %res 79} 80 81define <32 x half> @test_int_x86_avx512fp16_maskz_sub_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) { 82; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_512: 83; CHECK: # %bb.0: 84; CHECK-NEXT: kmovd %edi, %k1 85; CHECK-NEXT: vsubph %zmm2, %zmm1, %zmm0 {%k1} {z} 86; CHECK-NEXT: vsubph (%rsi), %zmm1, %zmm1 87; CHECK-NEXT: vsubph %zmm1, %zmm0, %zmm0 {%k1} 88; CHECK-NEXT: retq 89 %mask = bitcast i32 %msk to <32 x i1> 90 %val = load <32 x half>, ptr %ptr 91 %res0 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) 92 %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer 93 %t2 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %val, i32 4) 94 %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer 95 %res3 = fsub <32 x half> %res1, %res2 96 ret <32 x half> %res3 97} 98 99define <32 x half> @test_int_x86_avx512fp16_sub_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, ptr %ptr) { 100; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_512_round: 101; CHECK: # %bb.0: 102; CHECK-NEXT: kmovd %edi, %k1 103; CHECK-NEXT: vsubph {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 104; CHECK-NEXT: vmovaps %zmm2, %zmm0 105; CHECK-NEXT: retq 106 %mask = bitcast i32 %msk to <32 x i1> 107 %t1 = call <32 x half> @llvm.x86.avx512fp16.sub.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10) 108 %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src 109 ret <32 x half> %res 110} 111 112declare <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half>, <32 x half>, i32) 113 114define <32 x half> @test_int_x86_avx512fp16_mul_ph_512(<32 x half> %x1, <32 x half> %x2) { 115; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_512: 116; CHECK: # %bb.0: 117; CHECK-NEXT: vmulph %zmm1, %zmm0, %zmm0 118; CHECK-NEXT: retq 119 %res = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) 120 ret <32 x half> %res 121} 122 123define <32 x half> @test_int_x86_avx512fp16_mask_mul_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) { 124; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_512: 125; CHECK: # %bb.0: 126; CHECK-NEXT: kmovd %edi, %k1 127; CHECK-NEXT: vmulph %zmm2, %zmm1, %zmm0 {%k1} 128; CHECK-NEXT: retq 129 %mask = bitcast i32 %msk to <32 x i1> 130 %res0 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) 131 %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src 132 ret <32 x half> %res 133} 134 135define <32 x half> @test_int_x86_avx512fp16_maskz_mul_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) { 136; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_512: 137; CHECK: # %bb.0: 138; CHECK-NEXT: kmovd %edi, %k1 139; CHECK-NEXT: vmulph %zmm2, %zmm1, %zmm0 {%k1} {z} 140; CHECK-NEXT: vmulph (%rsi), %zmm1, %zmm1 {%k1} {z} 141; CHECK-NEXT: vmulph %zmm1, %zmm0, %zmm0 142; CHECK-NEXT: retq 143 %mask = bitcast i32 %msk to <32 x i1> 144 %val = load <32 x half>, ptr %ptr 145 %res0 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) 146 %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer 147 %t2 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %val, i32 4) 148 %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer 149 %res3 = fmul <32 x half> %res1, %res2 150 ret <32 x half> %res3 151} 152 153define <32 x half> @test_int_x86_avx512fp16_mul_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, ptr %ptr) { 154; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_512_round: 155; CHECK: # %bb.0: 156; CHECK-NEXT: kmovd %edi, %k1 157; CHECK-NEXT: vmulph {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 158; CHECK-NEXT: vmovaps %zmm2, %zmm0 159; CHECK-NEXT: retq 160 %mask = bitcast i32 %msk to <32 x i1> 161 %t1 = call <32 x half> @llvm.x86.avx512fp16.mul.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10) 162 %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src 163 ret <32 x half> %res 164} 165 166declare <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half>, <32 x half>, i32) 167 168define <32 x half> @test_int_x86_avx512fp16_div_ph_512(<32 x half> %x1, <32 x half> %x2) { 169; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_512: 170; CHECK: # %bb.0: 171; CHECK-NEXT: vdivph %zmm1, %zmm0, %zmm0 172; CHECK-NEXT: retq 173 %res = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) 174 ret <32 x half> %res 175} 176 177define <32 x half> @test_int_x86_avx512fp16_mask_div_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) { 178; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_512: 179; CHECK: # %bb.0: 180; CHECK-NEXT: kmovd %edi, %k1 181; CHECK-NEXT: vdivph %zmm2, %zmm1, %zmm0 {%k1} 182; CHECK-NEXT: retq 183 %mask = bitcast i32 %msk to <32 x i1> 184 %res0 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) 185 %res = select <32 x i1> %mask, <32 x half> %res0, <32 x half> %src 186 ret <32 x half> %res 187} 188 189define <32 x half> @test_int_x86_avx512fp16_maskz_div_ph_512(<32 x half> %src, <32 x half> %x1, <32 x half> %x2, i32 %msk, ptr %ptr) { 190; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_512: 191; CHECK: # %bb.0: 192; CHECK-NEXT: kmovd %edi, %k1 193; CHECK-NEXT: vdivph %zmm2, %zmm1, %zmm0 {%k1} {z} 194; CHECK-NEXT: vdivph (%rsi), %zmm1, %zmm1 {%k1} {z} 195; CHECK-NEXT: vdivph %zmm1, %zmm0, %zmm0 196; CHECK-NEXT: retq 197 %mask = bitcast i32 %msk to <32 x i1> 198 %val = load <32 x half>, ptr %ptr 199 %res0 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 4) 200 %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer 201 %t2 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %val, i32 4) 202 %res2 = select <32 x i1> %mask, <32 x half> %t2, <32 x half> zeroinitializer 203 %res3 = fdiv <32 x half> %res1, %res2 204 ret <32 x half> %res3 205} 206 207define <32 x half> @test_int_x86_avx512fp16_div_ph_512_round(<32 x half> %x1, <32 x half> %x2, <32 x half> %src, i32 %msk, ptr %ptr) { 208; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_512_round: 209; CHECK: # %bb.0: 210; CHECK-NEXT: kmovd %edi, %k1 211; CHECK-NEXT: vdivph {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} 212; CHECK-NEXT: vmovaps %zmm2, %zmm0 213; CHECK-NEXT: retq 214 %mask = bitcast i32 %msk to <32 x i1> 215 %t1 = call <32 x half> @llvm.x86.avx512fp16.div.ph.512(<32 x half> %x1, <32 x half> %x2, i32 10) 216 %res = select <32 x i1> %mask, <32 x half> %t1, <32 x half> %src 217 ret <32 x half> %res 218} 219 220declare <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half>, <32 x half>, i32) 221 222define <32 x half> @test_min_ph(<32 x half> %x1, <32 x half> %x2) { 223; CHECK-LABEL: test_min_ph: 224; CHECK: # %bb.0: 225; CHECK-NEXT: vminph %zmm1, %zmm0, %zmm0 226; CHECK-NEXT: retq 227 %res0 = fcmp olt <32 x half> %x1, %x2 228 %res1 = select <32 x i1> %res0, <32 x half> %x1, <32 x half> %x2 229 ret <32 x half> %res1 230} 231 232define <32 x half> @test_int_x86_avx512fp16_min_ph_512_sae(<32 x half> %x1, <32 x half> %x2) { 233; CHECK-LABEL: test_int_x86_avx512fp16_min_ph_512_sae: 234; CHECK: # %bb.0: 235; CHECK-NEXT: vminph {sae}, %zmm1, %zmm0, %zmm0 236; CHECK-NEXT: retq 237 %res0 = call <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8) 238 ret <32 x half> %res0 239} 240 241define <32 x half> @test_int_x86_avx512fp16_maskz_min_ph_512_sae(<32 x half> %x1, <32 x half> %x2, i32 %msk) { 242; CHECK-LABEL: test_int_x86_avx512fp16_maskz_min_ph_512_sae: 243; CHECK: # %bb.0: 244; CHECK-NEXT: kmovd %edi, %k1 245; CHECK-NEXT: vminph {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 246; CHECK-NEXT: retq 247 %mask = bitcast i32 %msk to <32 x i1> 248 %res0 = call <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8) 249 %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer 250 ret <32 x half> %res1 251} 252 253declare <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half>, <32 x half>, i32) 254 255define <32 x half> @test_max_ph(<32 x half> %x1, <32 x half> %x2) { 256; CHECK-LABEL: test_max_ph: 257; CHECK: # %bb.0: 258; CHECK-NEXT: vmaxph %zmm1, %zmm0, %zmm0 259; CHECK-NEXT: retq 260 %res0 = fcmp ogt <32 x half> %x1, %x2 261 %res1 = select <32 x i1> %res0, <32 x half> %x1, <32 x half> %x2 262 ret <32 x half> %res1 263} 264 265define <32 x half> @test_int_x86_avx512fp16_max_ph_512_sae(<32 x half> %x1, <32 x half> %x2) { 266; CHECK-LABEL: test_int_x86_avx512fp16_max_ph_512_sae: 267; CHECK: # %bb.0: 268; CHECK-NEXT: vmaxph {sae}, %zmm1, %zmm0, %zmm0 269; CHECK-NEXT: retq 270 %res0 = call <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8) 271 ret <32 x half> %res0 272} 273 274define <32 x half> @test_int_x86_avx512fp16_maskz_max_ph_512_sae(<32 x half> %x1, <32 x half> %x2, i32 %msk) { 275; CHECK-LABEL: test_int_x86_avx512fp16_maskz_max_ph_512_sae: 276; CHECK: # %bb.0: 277; CHECK-NEXT: kmovd %edi, %k1 278; CHECK-NEXT: vmaxph {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} 279; CHECK-NEXT: retq 280 %mask = bitcast i32 %msk to <32 x i1> 281 %res0 = call <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> %x1, <32 x half> %x2, i32 8) 282 %res1 = select <32 x i1> %mask, <32 x half> %res0, <32 x half> zeroinitializer 283 ret <32 x half> %res1 284} 285 286declare <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half>, <8 x double>, i8, i32) 287 288define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd(<8 x half> %x0, <8 x double> %x1, i8 %x2) { 289; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd: 290; CHECK: # %bb.0: 291; CHECK-NEXT: kmovd %edi, %k1 292; CHECK-NEXT: vcvtph2pd %xmm0, %zmm1 {%k1} 293; CHECK-NEXT: vmovaps %zmm1, %zmm0 294; CHECK-NEXT: retq 295 %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 4) 296 ret <8 x double> %res 297} 298 299define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_sae(<8 x half> %x0, <8 x double> %x1, i8 %x2) { 300; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_sae: 301; CHECK: # %bb.0: 302; CHECK-NEXT: kmovd %edi, %k1 303; CHECK-NEXT: vcvtph2pd {sae}, %xmm0, %zmm1 {%k1} 304; CHECK-NEXT: vmovaps %zmm1, %zmm0 305; CHECK-NEXT: retq 306 %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 8) 307 ret <8 x double> %res 308} 309 310define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_nomask(<8 x half> %x0, <8 x double> %x1) { 311; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_nomask: 312; CHECK: # %bb.0: 313; CHECK-NEXT: vcvtph2pd %xmm0, %zmm0 314; CHECK-NEXT: retq 315 %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 -1, i32 4) 316 ret <8 x double> %res 317} 318 319define <8 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_load(ptr %px0, <8 x double> %x1, i8 %x2) { 320; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_load: 321; CHECK: # %bb.0: 322; CHECK-NEXT: kmovd %esi, %k1 323; CHECK-NEXT: vcvtph2pd (%rdi), %zmm0 {%k1} 324; CHECK-NEXT: retq 325 %x0 = load <8 x half>, ptr %px0, align 16 326 %res = call <8 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.512(<8 x half> %x0, <8 x double> %x1, i8 %x2, i32 4) 327 ret <8 x double> %res 328} 329 330declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double>, <8 x half>, i8, i32) 331 332define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph(<8 x double> %x0, <8 x half> %x1, i8 %x2) { 333; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph: 334; CHECK: # %bb.0: 335; CHECK-NEXT: kmovd %edi, %k1 336; CHECK-NEXT: vcvtpd2ph %zmm0, %xmm1 {%k1} 337; CHECK-NEXT: vmovaps %xmm1, %xmm0 338; CHECK-NEXT: vzeroupper 339; CHECK-NEXT: retq 340 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 4) 341 ret <8 x half> %res 342} 343 344define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_r(<8 x double> %x0, <8 x half> %x1, i8 %x2) { 345; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_r: 346; CHECK: # %bb.0: 347; CHECK-NEXT: kmovd %edi, %k1 348; CHECK-NEXT: vcvtpd2ph {rz-sae}, %zmm0, %xmm1 {%k1} 349; CHECK-NEXT: vmovaps %xmm1, %xmm0 350; CHECK-NEXT: vzeroupper 351; CHECK-NEXT: retq 352 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 11) 353 ret <8 x half> %res 354} 355 356define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_load(ptr %px0, <8 x half> %x1, i8 %x2) { 357; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_load: 358; CHECK: # %bb.0: 359; CHECK-NEXT: kmovd %esi, %k1 360; CHECK-NEXT: vcvtpd2phz (%rdi), %xmm0 {%k1} 361; CHECK-NEXT: retq 362 %x0 = load <8 x double>, ptr %px0, align 64 363 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.512(<8 x double> %x0, <8 x half> %x1, i8 %x2, i32 4) 364 ret <8 x half> %res 365} 366 367declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half>, <4 x float>, <8 x half>, i8, i32) 368 369define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3) { 370; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round: 371; CHECK: # %bb.0: 372; CHECK-NEXT: kmovd %edi, %k1 373; CHECK-NEXT: vcvtss2sh %xmm1, %xmm0, %xmm2 {%k1} 374; CHECK-NEXT: vmovaps %xmm2, %xmm0 375; CHECK-NEXT: retq 376 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3, i32 4) 377 ret <8 x half> %res 378} 379 380define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_r(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3) { 381; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_r: 382; CHECK: # %bb.0: 383; CHECK-NEXT: kmovd %edi, %k1 384; CHECK-NEXT: vcvtss2sh {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 385; CHECK-NEXT: vmovaps %xmm2, %xmm0 386; CHECK-NEXT: retq 387 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 %x3, i32 11) 388 ret <8 x half> %res 389} 390 391define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_nomask(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2) { 392; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_nomask: 393; CHECK: # %bb.0: 394; CHECK-NEXT: vcvtss2sh %xmm1, %xmm0, %xmm0 395; CHECK-NEXT: retq 396 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> %x2, i8 -1, i32 4) 397 ret <8 x half> %res 398} 399 400define <8 x half> @test_int_x86_avx512fp16_mask_cvt_ss2sh_round_z(<8 x half> %x0, <4 x float> %x1, i8 %x2) { 401; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_ss2sh_round_z: 402; CHECK: # %bb.0: 403; CHECK-NEXT: kmovd %edi, %k1 404; CHECK-NEXT: vcvtss2sh %xmm1, %xmm0, %xmm0 {%k1} {z} 405; CHECK-NEXT: retq 406 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtss2sh.round(<8 x half> %x0, <4 x float> %x1, <8 x half> zeroinitializer, i8 %x2, i32 4) 407 ret <8 x half> %res 408} 409 410declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half>, <2 x double>, <8 x half>, i8, i32) 411 412define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3) { 413; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round: 414; CHECK: # %bb.0: 415; CHECK-NEXT: kmovd %edi, %k1 416; CHECK-NEXT: vcvtsd2sh %xmm1, %xmm0, %xmm2 {%k1} 417; CHECK-NEXT: vmovaps %xmm2, %xmm0 418; CHECK-NEXT: retq 419 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3, i32 4) 420 ret <8 x half> %res 421} 422 423define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_r(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3) { 424; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_r: 425; CHECK: # %bb.0: 426; CHECK-NEXT: kmovd %edi, %k1 427; CHECK-NEXT: vcvtsd2sh {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} 428; CHECK-NEXT: vmovaps %xmm2, %xmm0 429; CHECK-NEXT: retq 430 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 %x3, i32 11) 431 ret <8 x half> %res 432} 433 434define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_nomask(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2) { 435; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_nomask: 436; CHECK: # %bb.0: 437; CHECK-NEXT: vcvtsd2sh %xmm1, %xmm0, %xmm0 438; CHECK-NEXT: retq 439 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> %x2, i8 -1, i32 4) 440 ret <8 x half> %res 441} 442 443define <8 x half> @test_int_x86_avx512fp16_mask_cvt_sd2sh_round_z(<8 x half> %x0, <2 x double> %x1, i8 %x2) { 444; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sd2sh_round_z: 445; CHECK: # %bb.0: 446; CHECK-NEXT: kmovd %edi, %k1 447; CHECK-NEXT: vcvtsd2sh %xmm1, %xmm0, %xmm0 {%k1} {z} 448; CHECK-NEXT: retq 449 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtsd2sh.round(<8 x half> %x0, <2 x double> %x1, <8 x half> zeroinitializer, i8 %x2, i32 4) 450 ret <8 x half> %res 451} 452 453declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float>, <8 x half>, <4 x float>, i8, i32) 454 455define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3) { 456; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round: 457; CHECK: # %bb.0: 458; CHECK-NEXT: kmovd %edi, %k1 459; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm0, %xmm2 {%k1} 460; CHECK-NEXT: vmovaps %xmm2, %xmm0 461; CHECK-NEXT: retq 462 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3, i32 4) 463 ret <4 x float> %res 464} 465 466define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_r(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3) { 467; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_r: 468; CHECK: # %bb.0: 469; CHECK-NEXT: kmovd %edi, %k1 470; CHECK-NEXT: vcvtsh2ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} 471; CHECK-NEXT: vmovaps %xmm2, %xmm0 472; CHECK-NEXT: retq 473 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 %x3, i32 8) 474 ret <4 x float> %res 475} 476 477define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_nomask(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2) { 478; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_nomask: 479; CHECK: # %bb.0: 480; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm0, %xmm0 481; CHECK-NEXT: retq 482 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> %x2, i8 -1, i32 4) 483 ret <4 x float> %res 484} 485 486define <4 x float> @test_int_x86_avx512fp16_mask_cvt_sh2ss_round_z(<4 x float> %x0, <8 x half> %x1, i8 %x2) { 487; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2ss_round_z: 488; CHECK: # %bb.0: 489; CHECK-NEXT: kmovd %edi, %k1 490; CHECK-NEXT: vcvtsh2ss %xmm1, %xmm0, %xmm0 {%k1} {z} 491; CHECK-NEXT: retq 492 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtsh2ss.round(<4 x float> %x0, <8 x half> %x1, <4 x float> zeroinitializer, i8 %x2, i32 4) 493 ret <4 x float> %res 494} 495 496declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double>, <8 x half>, <2 x double>, i8, i32) 497 498define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3) { 499; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round: 500; CHECK: # %bb.0: 501; CHECK-NEXT: kmovd %edi, %k1 502; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm0, %xmm2 {%k1} 503; CHECK-NEXT: vmovaps %xmm2, %xmm0 504; CHECK-NEXT: retq 505 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3, i32 4) 506 ret <2 x double> %res 507} 508 509define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_r(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3) { 510; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_r: 511; CHECK: # %bb.0: 512; CHECK-NEXT: kmovd %edi, %k1 513; CHECK-NEXT: vcvtsh2sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} 514; CHECK-NEXT: vmovaps %xmm2, %xmm0 515; CHECK-NEXT: retq 516 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 %x3, i32 8) 517 ret <2 x double> %res 518} 519 520define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_nomask(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2) { 521; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_nomask: 522; CHECK: # %bb.0: 523; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm0, %xmm0 524; CHECK-NEXT: retq 525 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> %x2, i8 -1, i32 4) 526 ret <2 x double> %res 527} 528 529define <2 x double> @test_int_x86_avx512fp16_mask_cvt_sh2sd_round_z(<2 x double> %x0, <8 x half> %x1, i8 %x2) { 530; CHECK-LABEL: test_int_x86_avx512fp16_mask_cvt_sh2sd_round_z: 531; CHECK: # %bb.0: 532; CHECK-NEXT: kmovd %edi, %k1 533; CHECK-NEXT: vcvtsh2sd %xmm1, %xmm0, %xmm0 {%k1} {z} 534; CHECK-NEXT: retq 535 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtsh2sd.round(<2 x double> %x0, <8 x half> %x1, <2 x double> zeroinitializer, i8 %x2, i32 4) 536 ret <2 x double> %res 537} 538 539declare <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half>, <16 x float>, i16, i32) 540 541define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512(<16 x half> %x0) { 542; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_512: 543; CHECK: # %bb.0: 544; CHECK-NEXT: vcvtph2psx %ymm0, %zmm0 545; CHECK-NEXT: retq 546 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 4) 547 ret <16 x float> %res 548} 549 550define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512(<16 x half> %x0, <16 x float> %x1, i16 %x2) { 551; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_512: 552; CHECK: # %bb.0: 553; CHECK-NEXT: kmovd %edi, %k1 554; CHECK-NEXT: vcvtph2psx %ymm0, %zmm1 {%k1} 555; CHECK-NEXT: vmovaps %zmm1, %zmm0 556; CHECK-NEXT: retq 557 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> %x1, i16 %x2, i32 4) 558 ret <16 x float> %res 559} 560 561define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512(<16 x half> %x0, i16 %x2) { 562; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_512: 563; CHECK: # %bb.0: 564; CHECK-NEXT: kmovd %edi, %k1 565; CHECK-NEXT: vcvtph2psx %ymm0, %zmm0 {%k1} {z} 566; CHECK-NEXT: retq 567 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> zeroinitializer, i16 %x2, i32 4) 568 ret <16 x float> %res 569} 570 571define <16 x float> @test_int_x86_avx512_cvt_ph2psx_512r(<16 x half> %x0) { 572; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_512r: 573; CHECK: # %bb.0: 574; CHECK-NEXT: vcvtph2psx {sae}, %ymm0, %zmm0 575; CHECK-NEXT: retq 576 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> undef, i16 -1, i32 8) 577 ret <16 x float> %res 578} 579 580define <16 x float> @test_int_x86_avx512_mask_cvt_ph2psx_512r(<16 x half> %x0, <16 x float> %x1, i16 %x2) { 581; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_512r: 582; CHECK: # %bb.0: 583; CHECK-NEXT: kmovd %edi, %k1 584; CHECK-NEXT: vcvtph2psx {sae}, %ymm0, %zmm1 {%k1} 585; CHECK-NEXT: vmovaps %zmm1, %zmm0 586; CHECK-NEXT: retq 587 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> %x1, i16 %x2, i32 8) 588 ret <16 x float> %res 589} 590 591define <16 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_512r(<16 x half> %x0, i16 %x2) { 592; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_512r: 593; CHECK: # %bb.0: 594; CHECK-NEXT: kmovd %edi, %k1 595; CHECK-NEXT: vcvtph2psx {sae}, %ymm0, %zmm0 {%k1} {z} 596; CHECK-NEXT: retq 597 %res = call <16 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.512(<16 x half> %x0, <16 x float> zeroinitializer, i16 %x2, i32 8) 598 ret <16 x float> %res 599} 600 601declare <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float>, <16 x half>, i16, i32) 602 603define <16 x half> @test_int_x86_avx512_cvt_ps2phx_512(<16 x float> %x0) { 604; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_512: 605; CHECK: # %bb.0: 606; CHECK-NEXT: vcvtps2phx %zmm0, %ymm0 607; CHECK-NEXT: retq 608 %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> undef, i16 -1, i32 4) 609 ret <16 x half> %res 610} 611 612define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512(<16 x float> %x0, <16 x half> %x1, i16 %x2) { 613; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_512: 614; CHECK: # %bb.0: 615; CHECK-NEXT: kmovd %edi, %k1 616; CHECK-NEXT: vcvtps2phx %zmm0, %ymm1 {%k1} 617; CHECK-NEXT: vmovaps %ymm1, %ymm0 618; CHECK-NEXT: retq 619 %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 %x2, i32 4) 620 ret <16 x half> %res 621} 622 623define <16 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_512(<16 x float> %x0, i16 %x2) { 624; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_512: 625; CHECK: # %bb.0: 626; CHECK-NEXT: kmovd %edi, %k1 627; CHECK-NEXT: vcvtps2phx %zmm0, %ymm0 {%k1} {z} 628; CHECK-NEXT: retq 629 %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> zeroinitializer, i16 %x2, i32 4) 630 ret <16 x half> %res 631} 632 633define <16 x half> @test_int_x86_avx512_mask_cvt_ps2phx_512r(<16 x float> %x0, <16 x half> %x1, i16 %x2) { 634; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_512r: 635; CHECK: # %bb.0: 636; CHECK-NEXT: kmovd %edi, %k1 637; CHECK-NEXT: vcvtps2phx {rd-sae}, %zmm0, %ymm1 {%k1} 638; CHECK-NEXT: vcvtps2phx {ru-sae}, %zmm0, %ymm0 639; CHECK-NEXT: vaddph %ymm0, %ymm1, %ymm0 640; CHECK-NEXT: retq 641 %res = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 %x2, i32 9) 642 %res1 = call <16 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.512(<16 x float> %x0, <16 x half> %x1, i16 -1, i32 10) 643 %res2 = fadd <16 x half> %res, %res1 644 ret <16 x half> %res2 645} 646