1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -O3 -S | FileCheck %s 3; RUN: opt < %s -passes="default<O3>" -S | FileCheck %s 4 5target triple = "x86_64--" 6target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 7 8; Ideally, this should reach the backend with 1 fsub, 1 fadd, and 1 shuffle. 9; That may require some coordination between VectorCombine, SLP, and other passes. 10; The end goal is to get a single "vaddsubps" instruction for x86 with AVX. 11 12define <4 x float> @PR45015(<4 x float> %arg, <4 x float> %arg1) { 13; CHECK-LABEL: @PR45015( 14; CHECK-NEXT: [[TMP1:%.*]] = fsub <4 x float> [[ARG:%.*]], [[ARG1:%.*]] 15; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[ARG]], [[ARG1]] 16; CHECK-NEXT: [[T16:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 5, i32 2, i32 7> 17; CHECK-NEXT: ret <4 x float> [[T16]] 18; 19 %t = extractelement <4 x float> %arg, i32 0 20 %t2 = extractelement <4 x float> %arg1, i32 0 21 %t3 = fsub float %t, %t2 22 %t4 = insertelement <4 x float> undef, float %t3, i32 0 23 %t5 = extractelement <4 x float> %arg, i32 1 24 %t6 = extractelement <4 x float> %arg1, i32 1 25 %t7 = fadd float %t5, %t6 26 %t8 = insertelement <4 x float> %t4, float %t7, i32 1 27 %t9 = extractelement <4 x float> %arg, i32 2 28 %t10 = extractelement <4 x float> %arg1, i32 2 29 %t11 = fsub float %t9, %t10 30 %t12 = insertelement <4 x float> %t8, float %t11, i32 2 31 %t13 = extractelement <4 x float> %arg, i32 3 32 %t14 = extractelement <4 x float> %arg1, i32 3 33 %t15 = fadd float %t13, %t14 34 %t16 = insertelement <4 x float> %t12, float %t15, i32 3 35 ret <4 x float> %t16 36} 37 38; PR42022 - https://bugs.llvm.org/show_bug.cgi?id=42022 39 40%struct.Vector4 = type { float, float, float, float } 41 42define { <2 x float>, <2 x float> } @add_aggregate(<2 x float> %a0, <2 x float> %a1, <2 x float> %b0, <2 x float> %b1) { 43; CHECK-LABEL: @add_aggregate( 44; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[A0:%.*]], [[B0:%.*]] 45; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[A1:%.*]], [[B1:%.*]] 46; CHECK-NEXT: [[FCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> [[TMP1]], 0 47; CHECK-NEXT: [[FCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[FCA_0_INSERT]], <2 x float> [[TMP2]], 1 48; CHECK-NEXT: ret { <2 x float>, <2 x float> } [[FCA_1_INSERT]] 49; 50 %a00 = extractelement <2 x float> %a0, i32 0 51 %b00 = extractelement <2 x float> %b0, i32 0 52 %add = fadd float %a00, %b00 53 %retval.0.0.insert = insertelement <2 x float> undef, float %add, i32 0 54 %a01 = extractelement <2 x float> %a0, i32 1 55 %b01 = extractelement <2 x float> %b0, i32 1 56 %add4 = fadd float %a01, %b01 57 %retval.0.1.insert = insertelement <2 x float> %retval.0.0.insert, float %add4, i32 1 58 %a10 = extractelement <2 x float> %a1, i32 0 59 %b10 = extractelement <2 x float> %b1, i32 0 60 %add7 = fadd float %a10, %b10 61 %retval.1.0.insert = insertelement <2 x float> undef, float %add7, i32 0 62 %a11 = extractelement <2 x float> %a1, i32 1 63 %b11 = extractelement <2 x float> %b1, i32 1 64 %add10 = fadd float %a11, %b11 65 %retval.1.1.insert = insertelement <2 x float> %retval.1.0.insert, float %add10, i32 1 66 %fca.0.insert = insertvalue { <2 x float>, <2 x float> } undef, <2 x float> %retval.0.1.insert, 0 67 %fca.1.insert = insertvalue { <2 x float>, <2 x float> } %fca.0.insert, <2 x float> %retval.1.1.insert, 1 68 ret { <2 x float>, <2 x float> } %fca.1.insert 69} 70 71define void @add_aggregate_store(<2 x float> %a0, <2 x float> %a1, <2 x float> %b0, <2 x float> %b1, ptr nocapture dereferenceable(16) %r) { 72; CHECK-LABEL: @add_aggregate_store( 73; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[A0:%.*]], [[B0:%.*]] 74; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[A1:%.*]], [[B1:%.*]] 75; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 76; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[R:%.*]], align 4 77; CHECK-NEXT: ret void 78; 79 %a00 = extractelement <2 x float> %a0, i32 0 80 %b00 = extractelement <2 x float> %b0, i32 0 81 %add = fadd float %a00, %b00 82 store float %add, ptr %r, align 4 83 %a01 = extractelement <2 x float> %a0, i32 1 84 %b01 = extractelement <2 x float> %b0, i32 1 85 %add4 = fadd float %a01, %b01 86 %r1 = getelementptr inbounds %struct.Vector4, ptr %r, i64 0, i32 1 87 store float %add4, ptr %r1, align 4 88 %a10 = extractelement <2 x float> %a1, i32 0 89 %b10 = extractelement <2 x float> %b1, i32 0 90 %add7 = fadd float %a10, %b10 91 %r2 = getelementptr inbounds %struct.Vector4, ptr %r, i64 0, i32 2 92 store float %add7, ptr %r2, align 4 93 %a11 = extractelement <2 x float> %a1, i32 1 94 %b11 = extractelement <2 x float> %b1, i32 1 95 %add10 = fadd float %a11, %b11 96 %r3 = getelementptr inbounds %struct.Vector4, ptr %r, i64 0, i32 3 97 store float %add10, ptr %r3, align 4 98 ret void 99} 100