1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-unknown-linux-gnu" 6 7define void @f_fu(ptr %ret, ptr %aa, float %b) { 8; CHECK-LABEL: f_fu: 9; CHECK: ## %bb.0: ## %allocas 10; CHECK-NEXT: vcvttss2si %xmm0, %eax 11; CHECK-NEXT: vpbroadcastd %eax, %zmm0 12; CHECK-NEXT: vcvttps2dq (%rsi), %zmm1 13; CHECK-NEXT: vpsrld $31, %zmm0, %zmm2 14; CHECK-NEXT: vpaddd %zmm2, %zmm0, %zmm2 15; CHECK-NEXT: vpsrad $1, %zmm2, %zmm2 16; CHECK-NEXT: movw $-21846, %ax ## imm = 0xAAAA 17; CHECK-NEXT: kmovw %eax, %k1 18; CHECK-NEXT: vmovdqa32 {{.*#+}} zmm1 {%k1} = [u,3,u,5,u,7,u,9,u,11,u,13,u,15,u,17] 19; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 20; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 21; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 22; CHECK-NEXT: vmovups %zmm0, (%rdi) 23; CHECK-NEXT: retq 24allocas: 25 %ptr_masked_load.39 = load <16 x float>, ptr %aa, align 4 26 %b_load_to_int32 = fptosi float %b to i32 27 %b_load_to_int32_broadcast_init = insertelement <16 x i32> undef, i32 %b_load_to_int32, i32 0 28 %b_load_to_int32_broadcast = shufflevector <16 x i32> %b_load_to_int32_broadcast_init, <16 x i32> undef, <16 x i32> zeroinitializer 29 %b_to_int32 = fptosi float %b to i32 30 %b_to_int32_broadcast_init = insertelement <16 x i32> undef, i32 %b_to_int32, i32 0 31 %b_to_int32_broadcast = shufflevector <16 x i32> %b_to_int32_broadcast_init, <16 x i32> undef, <16 x i32> zeroinitializer 32 33 %a_load_to_int32 = fptosi <16 x float> %ptr_masked_load.39 to <16 x i32> 34 %div_v019_load_ = sdiv <16 x i32> %b_to_int32_broadcast, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 35 36 %v1.i = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17>, <16 x i32> %a_load_to_int32 37 38 %foo_test = add <16 x i32> %div_v019_load_, %b_load_to_int32_broadcast 39 40 41 %add_struct_offset_y_struct_offset33_x = add <16 x i32> %foo_test, %v1.i 42 43 %val = sitofp <16 x i32> %add_struct_offset_y_struct_offset33_x to <16 x float> 44 store <16 x float> %val, ptr %ret, align 4 45 ret void 46} 47