1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unkown-unkown -mattr=+avx512bw -mattr=+avx512vl -mattr=+avx512fp16 | FileCheck %s 3 4define <16 x half> @test_int_x86_avx512fp16_add_ph_256(<16 x half> %x1, <16 x half> %x2) { 5; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_256: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm0 8; CHECK-NEXT: retq 9 %res = fadd <16 x half> %x1, %x2 10 ret <16 x half> %res 11} 12 13define <16 x half> @test_int_x86_avx512fp16_mask_add_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) { 14; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_256: 15; CHECK: # %bb.0: 16; CHECK-NEXT: kmovd %edi, %k1 17; CHECK-NEXT: vmovaps %ymm2, %ymm3 18; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm3 {%k1} 19; CHECK-NEXT: vaddph (%rsi), %ymm0, %ymm2 {%k1} 20; CHECK-NEXT: vaddph %ymm2, %ymm3, %ymm0 21; CHECK-NEXT: retq 22 %msk = bitcast i16 %mask to <16 x i1> 23 %val = load <16 x half>, ptr %ptr 24 %res0 = fadd <16 x half> %x1, %x2 25 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src 26 %t3 = fadd <16 x half> %x1, %val 27 %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src 28 %res = fadd <16 x half> %res1 , %res2 29 ret <16 x half> %res 30} 31 32define <16 x half> @test_int_x86_avx512fp16_maskz_add_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) { 33; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_256: 34; CHECK: # %bb.0: 35; CHECK-NEXT: kmovd %edi, %k1 36; CHECK-NEXT: vaddph %ymm1, %ymm0, %ymm0 {%k1} {z} 37; CHECK-NEXT: retq 38 %msk = bitcast i16 %mask to <16 x i1> 39 %res0 = fadd <16 x half> %x1, %x2 40 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer 41 ret <16 x half> %res1 42} 43 44define <8 x half> @test_int_x86_avx512fp16_add_ph_128(<8 x half> %x1, <8 x half> %x2) { 45; CHECK-LABEL: test_int_x86_avx512fp16_add_ph_128: 46; CHECK: # %bb.0: 47; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0 48; CHECK-NEXT: retq 49 %res = fadd <8 x half> %x1, %x2 50 ret <8 x half> %res 51} 52 53define <8 x half> @test_int_x86_avx512fp16_mask_add_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { 54; CHECK-LABEL: test_int_x86_avx512fp16_mask_add_ph_128: 55; CHECK: # %bb.0: 56; CHECK-NEXT: kmovd %edi, %k1 57; CHECK-NEXT: vmovaps %xmm2, %xmm3 58; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm3 {%k1} 59; CHECK-NEXT: vaddph (%rsi), %xmm0, %xmm2 {%k1} 60; CHECK-NEXT: vaddph %xmm2, %xmm3, %xmm0 61; CHECK-NEXT: retq 62 %msk = bitcast i8 %mask to <8 x i1> 63 %val = load <8 x half>, ptr %ptr 64 %res0 = fadd <8 x half> %x1, %x2 65 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src 66 %t3 = fadd <8 x half> %x1, %val 67 %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src 68 %res = fadd <8 x half> %res1 , %res2 69 ret <8 x half> %res 70} 71 72define <8 x half> @test_int_x86_avx512fp16_maskz_add_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) { 73; CHECK-LABEL: test_int_x86_avx512fp16_maskz_add_ph_128: 74; CHECK: # %bb.0: 75; CHECK-NEXT: kmovd %edi, %k1 76; CHECK-NEXT: vaddph %xmm1, %xmm0, %xmm0 {%k1} {z} 77; CHECK-NEXT: retq 78 %msk = bitcast i8 %mask to <8 x i1> 79 %res0 = fadd <8 x half> %x1, %x2 80 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer 81 ret <8 x half> %res1 82} 83 84define <16 x half> @test_int_x86_avx512fp16_sub_ph_256(<16 x half> %x1, <16 x half> %x2) { 85; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_256: 86; CHECK: # %bb.0: 87; CHECK-NEXT: vsubph %ymm1, %ymm0, %ymm0 88; CHECK-NEXT: retq 89 %res = fsub <16 x half> %x1, %x2 90 ret <16 x half> %res 91} 92 93define <16 x half> @test_int_x86_avx512fp16_mask_sub_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) { 94; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_256: 95; CHECK: # %bb.0: 96; CHECK-NEXT: kmovd %edi, %k1 97; CHECK-NEXT: vmovaps %ymm2, %ymm3 98; CHECK-NEXT: vsubph %ymm1, %ymm0, %ymm3 {%k1} 99; CHECK-NEXT: vsubph (%rsi), %ymm0, %ymm2 {%k1} 100; CHECK-NEXT: vsubph %ymm2, %ymm3, %ymm0 101; CHECK-NEXT: retq 102 %msk = bitcast i16 %mask to <16 x i1> 103 %val = load <16 x half>, ptr %ptr 104 %res0 = fsub <16 x half> %x1, %x2 105 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src 106 %t3 = fsub <16 x half> %x1, %val 107 %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src 108 %res = fsub <16 x half> %res1 , %res2 109 ret <16 x half> %res 110} 111 112define <16 x half> @test_int_x86_avx512fp16_maskz_sub_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) { 113; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_256: 114; CHECK: # %bb.0: 115; CHECK-NEXT: kmovd %edi, %k1 116; CHECK-NEXT: vsubph %ymm1, %ymm0, %ymm0 {%k1} {z} 117; CHECK-NEXT: retq 118 %msk = bitcast i16 %mask to <16 x i1> 119 %res0 = fsub <16 x half> %x1, %x2 120 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer 121 ret <16 x half> %res1 122} 123 124define <8 x half> @test_int_x86_avx512fp16_sub_ph_128(<8 x half> %x1, <8 x half> %x2) { 125; CHECK-LABEL: test_int_x86_avx512fp16_sub_ph_128: 126; CHECK: # %bb.0: 127; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0 128; CHECK-NEXT: retq 129 %res = fsub <8 x half> %x1, %x2 130 ret <8 x half> %res 131} 132 133define <8 x half> @test_int_x86_avx512fp16_mask_sub_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { 134; CHECK-LABEL: test_int_x86_avx512fp16_mask_sub_ph_128: 135; CHECK: # %bb.0: 136; CHECK-NEXT: kmovd %edi, %k1 137; CHECK-NEXT: vmovaps %xmm2, %xmm3 138; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm3 {%k1} 139; CHECK-NEXT: vsubph (%rsi), %xmm0, %xmm2 {%k1} 140; CHECK-NEXT: vsubph %xmm2, %xmm3, %xmm0 141; CHECK-NEXT: retq 142 %msk = bitcast i8 %mask to <8 x i1> 143 %val = load <8 x half>, ptr %ptr 144 %res0 = fsub <8 x half> %x1, %x2 145 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src 146 %t3 = fsub <8 x half> %x1, %val 147 %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src 148 %res = fsub <8 x half> %res1 , %res2 149 ret <8 x half> %res 150} 151 152define <8 x half> @test_int_x86_avx512fp16_maskz_sub_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) { 153; CHECK-LABEL: test_int_x86_avx512fp16_maskz_sub_ph_128: 154; CHECK: # %bb.0: 155; CHECK-NEXT: kmovd %edi, %k1 156; CHECK-NEXT: vsubph %xmm1, %xmm0, %xmm0 {%k1} {z} 157; CHECK-NEXT: retq 158 %msk = bitcast i8 %mask to <8 x i1> 159 %res0 = fsub <8 x half> %x1, %x2 160 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer 161 ret <8 x half> %res1 162} 163 164define <16 x half> @test_int_x86_avx512fp16_mul_ph_256(<16 x half> %x1, <16 x half> %x2) { 165; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_256: 166; CHECK: # %bb.0: 167; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm0 168; CHECK-NEXT: retq 169 %res = fmul <16 x half> %x1, %x2 170 ret <16 x half> %res 171} 172 173define <16 x half> @test_int_x86_avx512fp16_mask_mul_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) { 174; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_256: 175; CHECK: # %bb.0: 176; CHECK-NEXT: kmovd %edi, %k1 177; CHECK-NEXT: vmovaps %ymm2, %ymm3 178; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm3 {%k1} 179; CHECK-NEXT: vmulph (%rsi), %ymm0, %ymm2 {%k1} 180; CHECK-NEXT: vmulph %ymm2, %ymm3, %ymm0 181; CHECK-NEXT: retq 182 %msk = bitcast i16 %mask to <16 x i1> 183 %val = load <16 x half>, ptr %ptr 184 %res0 = fmul <16 x half> %x1, %x2 185 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src 186 %t3 = fmul <16 x half> %x1, %val 187 %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src 188 %res = fmul <16 x half> %res1 , %res2 189 ret <16 x half> %res 190} 191 192define <16 x half> @test_int_x86_avx512fp16_maskz_mul_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) { 193; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_256: 194; CHECK: # %bb.0: 195; CHECK-NEXT: kmovd %edi, %k1 196; CHECK-NEXT: vmulph %ymm1, %ymm0, %ymm0 {%k1} {z} 197; CHECK-NEXT: retq 198 %msk = bitcast i16 %mask to <16 x i1> 199 %res0 = fmul <16 x half> %x1, %x2 200 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer 201 ret <16 x half> %res1 202} 203 204define <8 x half> @test_int_x86_avx512fp16_mul_ph_128(<8 x half> %x1, <8 x half> %x2) { 205; CHECK-LABEL: test_int_x86_avx512fp16_mul_ph_128: 206; CHECK: # %bb.0: 207; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm0 208; CHECK-NEXT: retq 209 %res = fmul <8 x half> %x1, %x2 210 ret <8 x half> %res 211} 212 213define <8 x half> @test_int_x86_avx512fp16_mask_mul_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { 214; CHECK-LABEL: test_int_x86_avx512fp16_mask_mul_ph_128: 215; CHECK: # %bb.0: 216; CHECK-NEXT: kmovd %edi, %k1 217; CHECK-NEXT: vmovaps %xmm2, %xmm3 218; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm3 {%k1} 219; CHECK-NEXT: vmulph (%rsi), %xmm0, %xmm2 {%k1} 220; CHECK-NEXT: vmulph %xmm2, %xmm3, %xmm0 221; CHECK-NEXT: retq 222 %msk = bitcast i8 %mask to <8 x i1> 223 %val = load <8 x half>, ptr %ptr 224 %res0 = fmul <8 x half> %x1, %x2 225 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src 226 %t3 = fmul <8 x half> %x1, %val 227 %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src 228 %res = fmul <8 x half> %res1 , %res2 229 ret <8 x half> %res 230} 231 232define <8 x half> @test_int_x86_avx512fp16_maskz_mul_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) { 233; CHECK-LABEL: test_int_x86_avx512fp16_maskz_mul_ph_128: 234; CHECK: # %bb.0: 235; CHECK-NEXT: kmovd %edi, %k1 236; CHECK-NEXT: vmulph %xmm1, %xmm0, %xmm0 {%k1} {z} 237; CHECK-NEXT: retq 238 %msk = bitcast i8 %mask to <8 x i1> 239 %res0 = fmul <8 x half> %x1, %x2 240 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer 241 ret <8 x half> %res1 242} 243 244define <16 x half> @test_int_x86_avx512fp16_div_ph_256(<16 x half> %x1, <16 x half> %x2) { 245; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_256: 246; CHECK: # %bb.0: 247; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm0 248; CHECK-NEXT: retq 249 %res = fdiv <16 x half> %x1, %x2 250 ret <16 x half> %res 251} 252 253define <16 x half> @test_int_x86_avx512fp16_div_ph_256_fast(<16 x half> %x1, <16 x half> %x2) { 254; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_256_fast: 255; CHECK: # %bb.0: 256; CHECK-NEXT: vrcpph %ymm1, %ymm1 257; CHECK-NEXT: vmulph %ymm0, %ymm1, %ymm0 258; CHECK-NEXT: retq 259 %res = fdiv fast <16 x half> %x1, %x2 260 ret <16 x half> %res 261} 262 263define <16 x half> @test_int_x86_avx512fp16_mask_div_ph_256(<16 x half> %x1, <16 x half> %x2, <16 x half> %src, i16 %mask, ptr %ptr) { 264; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_256: 265; CHECK: # %bb.0: 266; CHECK-NEXT: kmovd %edi, %k1 267; CHECK-NEXT: vmovaps %ymm2, %ymm3 268; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm3 {%k1} 269; CHECK-NEXT: vdivph (%rsi), %ymm0, %ymm2 {%k1} 270; CHECK-NEXT: vdivph %ymm2, %ymm3, %ymm0 271; CHECK-NEXT: retq 272 %msk = bitcast i16 %mask to <16 x i1> 273 %val = load <16 x half>, ptr %ptr 274 %res0 = fdiv <16 x half> %x1, %x2 275 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> %src 276 %t3 = fdiv <16 x half> %x1, %val 277 %res2 = select <16 x i1> %msk, <16 x half> %t3, <16 x half> %src 278 %res = fdiv <16 x half> %res1 , %res2 279 ret <16 x half> %res 280} 281 282define <16 x half> @test_int_x86_avx512fp16_maskz_div_ph_256(<16 x half> %x1, <16 x half> %x2, i16 %mask, ptr %ptr) { 283; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_256: 284; CHECK: # %bb.0: 285; CHECK-NEXT: kmovd %edi, %k1 286; CHECK-NEXT: vdivph %ymm1, %ymm0, %ymm0 {%k1} {z} 287; CHECK-NEXT: retq 288 %msk = bitcast i16 %mask to <16 x i1> 289 %res0 = fdiv <16 x half> %x1, %x2 290 %res1 = select <16 x i1> %msk, <16 x half> %res0, <16 x half> zeroinitializer 291 ret <16 x half> %res1 292} 293 294define <8 x half> @test_int_x86_avx512fp16_div_ph_128(<8 x half> %x1, <8 x half> %x2) { 295; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_128: 296; CHECK: # %bb.0: 297; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm0 298; CHECK-NEXT: retq 299 %res = fdiv <8 x half> %x1, %x2 300 ret <8 x half> %res 301} 302 303define <8 x half> @test_int_x86_avx512fp16_div_ph_128_fast(<8 x half> %x1, <8 x half> %x2) { 304; CHECK-LABEL: test_int_x86_avx512fp16_div_ph_128_fast: 305; CHECK: # %bb.0: 306; CHECK-NEXT: vrcpph %xmm1, %xmm1 307; CHECK-NEXT: vmulph %xmm0, %xmm1, %xmm0 308; CHECK-NEXT: retq 309 %res = fdiv fast <8 x half> %x1, %x2 310 ret <8 x half> %res 311} 312 313define <8 x half> @test_int_x86_avx512fp16_mask_div_ph_128(<8 x half> %x1, <8 x half> %x2, <8 x half> %src, i8 %mask, ptr %ptr) { 314; CHECK-LABEL: test_int_x86_avx512fp16_mask_div_ph_128: 315; CHECK: # %bb.0: 316; CHECK-NEXT: kmovd %edi, %k1 317; CHECK-NEXT: vmovaps %xmm2, %xmm3 318; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm3 {%k1} 319; CHECK-NEXT: vdivph (%rsi), %xmm0, %xmm2 {%k1} 320; CHECK-NEXT: vdivph %xmm2, %xmm3, %xmm0 321; CHECK-NEXT: retq 322 %msk = bitcast i8 %mask to <8 x i1> 323 %val = load <8 x half>, ptr %ptr 324 %res0 = fdiv <8 x half> %x1, %x2 325 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> %src 326 %t3 = fdiv <8 x half> %x1, %val 327 %res2 = select <8 x i1> %msk, <8 x half> %t3, <8 x half> %src 328 %res = fdiv <8 x half> %res1 , %res2 329 ret <8 x half> %res 330} 331 332define <8 x half> @test_int_x86_avx512fp16_maskz_div_ph_128(<8 x half> %x1, <8 x half> %x2, i8 %mask, ptr %ptr) { 333; CHECK-LABEL: test_int_x86_avx512fp16_maskz_div_ph_128: 334; CHECK: # %bb.0: 335; CHECK-NEXT: kmovd %edi, %k1 336; CHECK-NEXT: vdivph %xmm1, %xmm0, %xmm0 {%k1} {z} 337; CHECK-NEXT: retq 338 %msk = bitcast i8 %mask to <8 x i1> 339 %res0 = fdiv <8 x half> %x1, %x2 340 %res1 = select <8 x i1> %msk, <8 x half> %res0, <8 x half> zeroinitializer 341 ret <8 x half> %res1 342} 343 344define <16 x half> @test_min_ph_256(<16 x half> %x1, <16 x half> %x2) { 345; CHECK-LABEL: test_min_ph_256: 346; CHECK: # %bb.0: 347; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0 348; CHECK-NEXT: retq 349 %res0 = fcmp olt <16 x half> %x1, %x2 350 %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2 351 ret <16 x half> %res1 352} 353 354define <16 x half> @test_max_ph_256(<16 x half> %x1, <16 x half> %x2) { 355; CHECK-LABEL: test_max_ph_256: 356; CHECK: # %bb.0: 357; CHECK-NEXT: vmaxph %ymm1, %ymm0, %ymm0 358; CHECK-NEXT: retq 359 %res0 = fcmp ogt <16 x half> %x1, %x2 360 %res1 = select <16 x i1> %res0, <16 x half> %x1, <16 x half> %x2 361 ret <16 x half> %res1 362} 363 364define <8 x half> @test_min_ph_128(<8 x half> %x1, <8 x half> %x2) { 365; CHECK-LABEL: test_min_ph_128: 366; CHECK: # %bb.0: 367; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0 368; CHECK-NEXT: retq 369 %res0 = fcmp olt <8 x half> %x1, %x2 370 %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2 371 ret <8 x half> %res1 372} 373 374define <8 x half> @test_max_ph_128(<8 x half> %x1, <8 x half> %x2) { 375; CHECK-LABEL: test_max_ph_128: 376; CHECK: # %bb.0: 377; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0 378; CHECK-NEXT: retq 379 %res0 = fcmp ogt <8 x half> %x1, %x2 380 %res1 = select <8 x i1> %res0, <8 x half> %x1, <8 x half> %x2 381 ret <8 x half> %res1 382} 383 384declare <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half>, <8 x half>) 385declare <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half>, <16 x half>) 386 387define <8 x half> @test_max_ph_128_2(<8 x half> %x1, <8 x half> %x2) { 388; CHECK-LABEL: test_max_ph_128_2: 389; CHECK: # %bb.0: 390; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0 391; CHECK-NEXT: retq 392 %res0 = call <8 x half> @llvm.x86.avx512fp16.max.ph.128(<8 x half> %x1, <8 x half> %x2) 393 ret <8 x half> %res0 394} 395 396define <16 x half> @test_max_ph_256_2(<16 x half> %x1, <16 x half> %x2) { 397; CHECK-LABEL: test_max_ph_256_2: 398; CHECK: # %bb.0: 399; CHECK-NEXT: vmaxph %ymm1, %ymm0, %ymm0 400; CHECK-NEXT: retq 401 %res0 = call <16 x half> @llvm.x86.avx512fp16.max.ph.256(<16 x half> %x1, <16 x half> %x2) 402 ret <16 x half> %res0 403} 404 405declare <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half>, <8 x half>) 406declare <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half>, <16 x half>) 407 408define <8 x half> @test_min_ph_128_2(<8 x half> %x1, <8 x half> %x2) { 409; CHECK-LABEL: test_min_ph_128_2: 410; CHECK: # %bb.0: 411; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0 412; CHECK-NEXT: retq 413 %res0 = call <8 x half> @llvm.x86.avx512fp16.min.ph.128(<8 x half> %x1, <8 x half> %x2) 414 ret <8 x half> %res0 415} 416 417define <16 x half> @test_min_ph_256_2(<16 x half> %x1, <16 x half> %x2) { 418; CHECK-LABEL: test_min_ph_256_2: 419; CHECK: # %bb.0: 420; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0 421; CHECK-NEXT: retq 422 %res0 = call <16 x half> @llvm.x86.avx512fp16.min.ph.256(<16 x half> %x1, <16 x half> %x2) 423 ret <16 x half> %res0 424} 425 426declare <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half>, <4 x double>, i8) 427 428define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256(<8 x half> %x0, <4 x double> %x1, i8 %x2) { 429; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256: 430; CHECK: # %bb.0: 431; CHECK-NEXT: kmovd %edi, %k1 432; CHECK-NEXT: vcvtph2pd %xmm0, %ymm1 {%k1} 433; CHECK-NEXT: vmovaps %ymm1, %ymm0 434; CHECK-NEXT: retq 435 %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 %x2) 436 ret <4 x double> %res 437} 438 439define <4 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask(<8 x half> %x0, <4 x double> %x1) { 440; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_256_nomask: 441; CHECK: # %bb.0: 442; CHECK-NEXT: vcvtph2pd %xmm0, %ymm0 443; CHECK-NEXT: retq 444 %res = call <4 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.256(<8 x half> %x0, <4 x double> %x1, i8 -1) 445 ret <4 x double> %res 446} 447 448declare <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half>, <2 x double>, i8) 449 450define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128(<8 x half> %x0, <2 x double> %x1, i8 %x2) { 451; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128: 452; CHECK: # %bb.0: 453; CHECK-NEXT: kmovd %edi, %k1 454; CHECK-NEXT: vcvtph2pd %xmm0, %xmm1 {%k1} 455; CHECK-NEXT: vmovaps %xmm1, %xmm0 456; CHECK-NEXT: retq 457 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 %x2) 458 ret <2 x double> %res 459} 460 461define <2 x double> @test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask(<8 x half> %x0, <2 x double> %x1) { 462; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_ph2pd_128_nomask: 463; CHECK: # %bb.0: 464; CHECK-NEXT: vcvtph2pd %xmm0, %xmm0 465; CHECK-NEXT: retq 466 %res = call <2 x double> @llvm.x86.avx512fp16.mask.vcvtph2pd.128(<8 x half> %x0, <2 x double> %x1, i8 -1) 467 ret <2 x double> %res 468} 469 470declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double>, <8 x half>, i8) 471 472define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256(<4 x double> %x0, <8 x half> %x1, i8 %x2) { 473; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256: 474; CHECK: # %bb.0: 475; CHECK-NEXT: kmovd %edi, %k1 476; CHECK-NEXT: vcvtpd2ph %ymm0, %xmm1 {%k1} 477; CHECK-NEXT: vmovaps %xmm1, %xmm0 478; CHECK-NEXT: vzeroupper 479; CHECK-NEXT: retq 480 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2) 481 ret <8 x half> %res 482} 483 484define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_256_load(ptr %px0, <8 x half> %x1, i8 %x2) { 485; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_256_load: 486; CHECK: # %bb.0: 487; CHECK-NEXT: kmovd %esi, %k1 488; CHECK-NEXT: vcvtpd2phy (%rdi), %xmm0 {%k1} 489; CHECK-NEXT: retq 490 %x0 = load <4 x double>, ptr %px0, align 32 491 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.256(<4 x double> %x0, <8 x half> %x1, i8 %x2) 492 ret <8 x half> %res 493} 494 495declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double>, <8 x half>, i8) 496 497define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128(<2 x double> %x0, <8 x half> %x1, i8 %x2) { 498; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128: 499; CHECK: # %bb.0: 500; CHECK-NEXT: kmovd %edi, %k1 501; CHECK-NEXT: vcvtpd2ph %xmm0, %xmm1 {%k1} 502; CHECK-NEXT: vmovaps %xmm1, %xmm0 503; CHECK-NEXT: retq 504 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2) 505 ret <8 x half> %res 506} 507 508define <8 x half> @test_int_x86_avx512_mask_vcvt_pd2ph_128_load(ptr %px0, <8 x half> %x1, i8 %x2) { 509; CHECK-LABEL: test_int_x86_avx512_mask_vcvt_pd2ph_128_load: 510; CHECK: # %bb.0: 511; CHECK-NEXT: kmovd %esi, %k1 512; CHECK-NEXT: vcvtpd2phx (%rdi), %xmm0 {%k1} 513; CHECK-NEXT: retq 514 %x0 = load <2 x double>, ptr %px0, align 16 515 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtpd2ph.128(<2 x double> %x0, <8 x half> %x1, i8 %x2) 516 ret <8 x half> %res 517} 518 519declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half>, <4 x i32>, i8) 520 521define <4 x i32> @test_int_x86_avx512_cvt_ph2udq_128(<8 x half> %x0) { 522; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_128: 523; CHECK: # %bb.0: 524; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 525; CHECK-NEXT: retq 526 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) 527 ret <4 x i32> %res 528} 529 530define <4 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { 531; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_128: 532; CHECK: # %bb.0: 533; CHECK-NEXT: kmovd %edi, %k1 534; CHECK-NEXT: vcvtph2udq %xmm0, %xmm1 {%k1} 535; CHECK-NEXT: vmovaps %xmm1, %xmm0 536; CHECK-NEXT: retq 537 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) 538 ret <4 x i32> %res 539} 540 541define <4 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_128(<8 x half> %x0, i8 %x2) { 542; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_128: 543; CHECK: # %bb.0: 544; CHECK-NEXT: kmovd %edi, %k1 545; CHECK-NEXT: vcvtph2udq %xmm0, %xmm0 {%k1} {z} 546; CHECK-NEXT: retq 547 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) 548 ret <4 x i32> %res 549} 550 551declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half>, <8 x i32>, i8) 552 553define <8 x i32> @test_int_x86_avx512_cvt_ph2udq_256(<8 x half> %x0) { 554; CHECK-LABEL: test_int_x86_avx512_cvt_ph2udq_256: 555; CHECK: # %bb.0: 556; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 557; CHECK-NEXT: retq 558 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) 559 ret <8 x i32> %res 560} 561 562define <8 x i32> @test_int_x86_avx512_mask_cvt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { 563; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2udq_256: 564; CHECK: # %bb.0: 565; CHECK-NEXT: kmovd %edi, %k1 566; CHECK-NEXT: vcvtph2udq %xmm0, %ymm1 {%k1} 567; CHECK-NEXT: vmovaps %ymm1, %ymm0 568; CHECK-NEXT: retq 569 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) 570 ret <8 x i32> %res 571} 572 573define <8 x i32> @test_int_x86_avx512_maskz_cvt_ph2udq_256(<8 x half> %x0, i8 %x2) { 574; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2udq_256: 575; CHECK: # %bb.0: 576; CHECK-NEXT: kmovd %edi, %k1 577; CHECK-NEXT: vcvtph2udq %xmm0, %ymm0 {%k1} {z} 578; CHECK-NEXT: retq 579 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvtph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) 580 ret <8 x i32> %res 581} 582 583declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half>, <4 x i32>, i8) 584 585define <4 x i32> @test_int_x86_avx512_cvtt_ph2dq_128(<8 x half> %x0) { 586; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_128: 587; CHECK: # %bb.0: 588; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 589; CHECK-NEXT: retq 590 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) 591 ret <4 x i32> %res 592} 593 594define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { 595; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_128: 596; CHECK: # %bb.0: 597; CHECK-NEXT: kmovd %edi, %k1 598; CHECK-NEXT: vcvttph2dq %xmm0, %xmm1 {%k1} 599; CHECK-NEXT: vmovaps %xmm1, %xmm0 600; CHECK-NEXT: retq 601 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) 602 ret <4 x i32> %res 603} 604 605define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_128(<8 x half> %x0, i8 %x2) { 606; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_128: 607; CHECK: # %bb.0: 608; CHECK-NEXT: kmovd %edi, %k1 609; CHECK-NEXT: vcvttph2dq %xmm0, %xmm0 {%k1} {z} 610; CHECK-NEXT: retq 611 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) 612 ret <4 x i32> %res 613} 614 615declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half>, <8 x i32>, i8) 616 617define <8 x i32> @test_int_x86_avx512_cvtt_ph2dq_256(<8 x half> %x0) { 618; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2dq_256: 619; CHECK: # %bb.0: 620; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 621; CHECK-NEXT: retq 622 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) 623 ret <8 x i32> %res 624} 625 626define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2dq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { 627; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2dq_256: 628; CHECK: # %bb.0: 629; CHECK-NEXT: kmovd %edi, %k1 630; CHECK-NEXT: vcvttph2dq %xmm0, %ymm1 {%k1} 631; CHECK-NEXT: vmovaps %ymm1, %ymm0 632; CHECK-NEXT: retq 633 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) 634 ret <8 x i32> %res 635} 636 637define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2dq_256(<8 x half> %x0, i8 %x2) { 638; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2dq_256: 639; CHECK: # %bb.0: 640; CHECK-NEXT: kmovd %edi, %k1 641; CHECK-NEXT: vcvttph2dq %xmm0, %ymm0 {%k1} {z} 642; CHECK-NEXT: retq 643 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2dq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) 644 ret <8 x i32> %res 645} 646 647declare <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half>, <4 x i32>, i8) 648 649define <4 x i32> @test_int_x86_avx512_cvtt_ph2udq_128(<8 x half> %x0) { 650; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_128: 651; CHECK: # %bb.0: 652; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 653; CHECK-NEXT: retq 654 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> undef, i8 -1) 655 ret <4 x i32> %res 656} 657 658define <4 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) { 659; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_128: 660; CHECK: # %bb.0: 661; CHECK-NEXT: kmovd %edi, %k1 662; CHECK-NEXT: vcvttph2udq %xmm0, %xmm1 {%k1} 663; CHECK-NEXT: vmovaps %xmm1, %xmm0 664; CHECK-NEXT: retq 665 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> %x1, i8 %x2) 666 ret <4 x i32> %res 667} 668 669define <4 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_128(<8 x half> %x0, i8 %x2) { 670; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_128: 671; CHECK: # %bb.0: 672; CHECK-NEXT: kmovd %edi, %k1 673; CHECK-NEXT: vcvttph2udq %xmm0, %xmm0 {%k1} {z} 674; CHECK-NEXT: retq 675 %res = call <4 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.128(<8 x half> %x0, <4 x i32> zeroinitializer, i8 %x2) 676 ret <4 x i32> %res 677} 678 679declare <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half>, <8 x i32>, i8) 680 681define <8 x i32> @test_int_x86_avx512_cvtt_ph2udq_256(<8 x half> %x0) { 682; CHECK-LABEL: test_int_x86_avx512_cvtt_ph2udq_256: 683; CHECK: # %bb.0: 684; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 685; CHECK-NEXT: retq 686 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> undef, i8 -1) 687 ret <8 x i32> %res 688} 689 690define <8 x i32> @test_int_x86_avx512_mask_cvtt_ph2udq_256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) { 691; CHECK-LABEL: test_int_x86_avx512_mask_cvtt_ph2udq_256: 692; CHECK: # %bb.0: 693; CHECK-NEXT: kmovd %edi, %k1 694; CHECK-NEXT: vcvttph2udq %xmm0, %ymm1 {%k1} 695; CHECK-NEXT: vmovaps %ymm1, %ymm0 696; CHECK-NEXT: retq 697 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> %x1, i8 %x2) 698 ret <8 x i32> %res 699} 700 701define <8 x i32> @test_int_x86_avx512_maskz_cvtt_ph2udq_256(<8 x half> %x0, i8 %x2) { 702; CHECK-LABEL: test_int_x86_avx512_maskz_cvtt_ph2udq_256: 703; CHECK: # %bb.0: 704; CHECK-NEXT: kmovd %edi, %k1 705; CHECK-NEXT: vcvttph2udq %xmm0, %ymm0 {%k1} {z} 706; CHECK-NEXT: retq 707 %res = call <8 x i32> @llvm.x86.avx512fp16.mask.vcvttph2udq.256(<8 x half> %x0, <8 x i32> zeroinitializer, i8 %x2) 708 ret <8 x i32> %res 709} 710 711declare <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half>, <4 x float>, i8) 712 713define <4 x float> @test_int_x86_avx512_cvt_ph2psx_128(<8 x half> %x0) { 714; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_128: 715; CHECK: # %bb.0: 716; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 717; CHECK-NEXT: retq 718 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> undef, i8 -1) 719 ret <4 x float> %res 720} 721 722define <4 x float> @test_int_x86_avx512_mask_cvt_ph2psx_128(<8 x half> %x0, <4 x float> %x1, i8 %x2) { 723; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_128: 724; CHECK: # %bb.0: 725; CHECK-NEXT: kmovd %edi, %k1 726; CHECK-NEXT: vcvtph2psx %xmm0, %xmm1 {%k1} 727; CHECK-NEXT: vmovaps %xmm1, %xmm0 728; CHECK-NEXT: retq 729 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> %x1, i8 %x2) 730 ret <4 x float> %res 731} 732 733define <4 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_128(<8 x half> %x0, i8 %x2) { 734; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_128: 735; CHECK: # %bb.0: 736; CHECK-NEXT: kmovd %edi, %k1 737; CHECK-NEXT: vcvtph2psx %xmm0, %xmm0 {%k1} {z} 738; CHECK-NEXT: retq 739 %res = call <4 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.128(<8 x half> %x0, <4 x float> zeroinitializer, i8 %x2) 740 ret <4 x float> %res 741} 742 743declare <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half>, <8 x float>, i8) 744 745define <8 x float> @test_int_x86_avx512_cvt_ph2psx_256(<8 x half> %x0) { 746; CHECK-LABEL: test_int_x86_avx512_cvt_ph2psx_256: 747; CHECK: # %bb.0: 748; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0 749; CHECK-NEXT: retq 750 %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> undef, i8 -1) 751 ret <8 x float> %res 752} 753 754define <8 x float> @test_int_x86_avx512_mask_cvt_ph2psx_256(<8 x half> %x0, <8 x float> %x1, i8 %x2) { 755; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ph2psx_256: 756; CHECK: # %bb.0: 757; CHECK-NEXT: kmovd %edi, %k1 758; CHECK-NEXT: vcvtph2psx %xmm0, %ymm1 {%k1} 759; CHECK-NEXT: vmovaps %ymm1, %ymm0 760; CHECK-NEXT: retq 761 %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> %x1, i8 %x2) 762 ret <8 x float> %res 763} 764 765define <8 x float> @test_int_x86_avx512_maskz_cvt_ph2psx_256(<8 x half> %x0, i8 %x2) { 766; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ph2psx_256: 767; CHECK: # %bb.0: 768; CHECK-NEXT: kmovd %edi, %k1 769; CHECK-NEXT: vcvtph2psx %xmm0, %ymm0 {%k1} {z} 770; CHECK-NEXT: retq 771 %res = call <8 x float> @llvm.x86.avx512fp16.mask.vcvtph2psx.256(<8 x half> %x0, <8 x float> zeroinitializer, i8 %x2) 772 ret <8 x float> %res 773} 774 775declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float>, <8 x half>, i8) 776 777define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_128(<4 x float> %x0, <8 x half> %x1, i8 %x2) { 778; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_128: 779; CHECK: # %bb.0: 780; CHECK-NEXT: kmovd %edi, %k1 781; CHECK-NEXT: vcvtps2phx %xmm0, %xmm1 {%k1} 782; CHECK-NEXT: vcvtps2phx %xmm0, %xmm0 783; CHECK-NEXT: vaddph %xmm0, %xmm1, %xmm0 784; CHECK-NEXT: retq 785 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 %x2) 786 %res1 = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.128(<4 x float> %x0, <8 x half> %x1, i8 -1) 787 %res2 = fadd <8 x half> %res, %res1 788 ret <8 x half> %res2 789} 790 791declare <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float>, <8 x half>, i8) 792 793define <8 x half> @test_int_x86_avx512_cvt_ps2phx_256(<8 x float> %x0) { 794; CHECK-LABEL: test_int_x86_avx512_cvt_ps2phx_256: 795; CHECK: # %bb.0: 796; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0 797; CHECK-NEXT: vzeroupper 798; CHECK-NEXT: retq 799 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> undef, i8 -1) 800 ret <8 x half> %res 801} 802 803define <8 x half> @test_int_x86_avx512_mask_cvt_ps2phx_256(<8 x float> %x0, <8 x half> %x1, i8 %x2) { 804; CHECK-LABEL: test_int_x86_avx512_mask_cvt_ps2phx_256: 805; CHECK: # %bb.0: 806; CHECK-NEXT: kmovd %edi, %k1 807; CHECK-NEXT: vcvtps2phx %ymm0, %xmm1 {%k1} 808; CHECK-NEXT: vmovaps %xmm1, %xmm0 809; CHECK-NEXT: vzeroupper 810; CHECK-NEXT: retq 811 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> %x1, i8 %x2) 812 ret <8 x half> %res 813} 814 815define <8 x half> @test_int_x86_avx512_maskz_cvt_ps2phx_256(<8 x float> %x0, i8 %x2) { 816; CHECK-LABEL: test_int_x86_avx512_maskz_cvt_ps2phx_256: 817; CHECK: # %bb.0: 818; CHECK-NEXT: kmovd %edi, %k1 819; CHECK-NEXT: vcvtps2phx %ymm0, %xmm0 {%k1} {z} 820; CHECK-NEXT: vzeroupper 821; CHECK-NEXT: retq 822 %res = call <8 x half> @llvm.x86.avx512fp16.mask.vcvtps2phx.256(<8 x float> %x0, <8 x half> zeroinitializer, i8 %x2) 823 ret <8 x half> %res 824} 825