xref: /llvm-project/llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll (revision 7f3428d3ed71d87a2088b77b6cab9f3d86544234)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; REQUIRES: asserts
3
4; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -vectorizer-maximize-bandwidth -mtriple=arm64-apple-ios -debug -S %s 2>&1 | FileCheck %s
5
6target triple = "arm64-apple-ios"
7
8; CHECK-LABEL: LV: Checking a loop in 'test'
9; CHECK:      VPlan 'Initial VPlan for VF={2},UF>=1' {
10; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
11; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
12
13; CHECK-NEXT: Live-in ir<1024> = original trip-count
14; CHECK-EMPTY:
15; CHECK-NEXT: ir-bb<entry>:
16; CHECK-NEXT: Successor(s): vector.ph
17; CHECK-EMPTY:
18; CHECK-NEXT: vector.ph:
19; CHECK-NEXT: Successor(s): vector loop
20; CHECK-EMPTY:
21; CHECK-NEXT: <x1> vector loop: {
22; CHECK-NEXT:   vector.body:
23; CHECK-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
24; CHECK-NEXT:     vp<[[STEPS:%.+]]>    = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
25; CHECK-NEXT:     CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]>
26; CHECK-NEXT:     vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src>
27; CHECK-NEXT:     WIDEN ir<%l> = load vp<[[VEC_PTR]]>
28; CHECK-NEXT:     WIDEN-CAST ir<%conv> = fpext ir<%l> to double
29; CHECK-NEXT:     WIDEN-CALL ir<%s> = call reassoc nnan ninf nsz arcp contract afn @llvm.sin.f64(ir<%conv>) (using library function: __simd_sin_v2f64)
30; CHECK-NEXT:     REPLICATE ir<%gep.dst> = getelementptr inbounds ir<%dst>, vp<[[STEPS]]>
31; CHECK-NEXT:     REPLICATE store ir<%s>, ir<%gep.dst>
32; CHECK-NEXT:     EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
33; CHECK-NEXT:     EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
34; CHECK-NEXT:   No successors
35; CHECK-NEXT: }
36; CHECK-NEXT: Successor(s): middle.block
37; CHECK-EMPTY:
38; CHECK-NEXT: middle.block:
39; CHECK-NEXT:   EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]>
40; CHECK-NEXT:   EMIT branch-on-cond vp<[[CMP]]>
41; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
42; CHECK-EMPTY:
43; CHECK-NEXT: scalar.ph:
44; CHECK-NEXT:   EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
45; CHECK-NEXT: Successor(s): ir-bb<loop>
46; CHECK-EMPTY:
47; CHECK-NEXT: ir-bb<loop>:
48; CHECK-NEXT:   IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME]]> from scalar.ph)
49; CHECK:        IR   %cmp = icmp ne i64 %iv.next, 1024
50; CHECK-NEXT: No successors
51; CHECK-EMPTY:
52; CHECK-NEXT: ir-bb<exit>:
53; CHECK-NEXT: No successors
54; CHECK-NEXT: }
55
56; CHECK:      VPlan 'Initial VPlan for VF={4},UF>=1' {
57; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
58; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
59; CHECK-NEXT: Live-in ir<1024> = original trip-count
60; CHECK-EMPTY:
61; CHECK-NEXT: ir-bb<entry>:
62; CHECK-NEXT: Successor(s): vector.ph
63; CHECK-EMPTY:
64; CHECK-NEXT: vector.ph:
65; CHECK-NEXT: Successor(s): vector loop
66; CHECK-EMPTY:
67; CHECK-NEXT: <x1> vector loop: {
68; CHECK-NEXT:   vector.body:
69; CHECK-NEXT:     EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
70; CHECK-NEXT:     vp<[[STEPS:%.+]]>    = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
71; CHECK-NEXT:     CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]>
72; CHECK-NEXT:     vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src>
73; CHECK-NEXT:     WIDEN ir<%l> = load vp<[[VEC_PTR]]>
74; CHECK-NEXT:     WIDEN-CAST ir<%conv> = fpext ir<%l> to double
75; CHECK-NEXT:     WIDEN-INTRINSIC ir<%s> = call reassoc nnan ninf nsz arcp contract afn llvm.sin(ir<%conv>)
76; CHECK-NEXT:     REPLICATE ir<%gep.dst> = getelementptr inbounds ir<%dst>, vp<[[STEPS]]>
77; CHECK-NEXT:     REPLICATE store ir<%s>, ir<%gep.dst>
78; CHECK-NEXT:     EMIT vp<[[CAN_IV_NEXT:%.+]]> = add nuw vp<[[CAN_IV]]>, vp<[[VFxUF]]>
79; CHECK-NEXT:     EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, vp<[[VTC]]>
80; CHECK-NEXT:   No successors
81; CHECK-NEXT: }
82; CHECK-NEXT: Successor(s): middle.block
83; CHECK-EMPTY:
84; CHECK-NEXT: middle.block:
85; CHECK-NEXT:   EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VTC]]>
86; CHECK-NEXT:   EMIT branch-on-cond vp<[[CMP]]>
87; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
88; CHECK-EMPTY:
89; CHECK-NEXT: scalar.ph:
90; CHECK-NEXT:   EMIT vp<[[RESUME:%.+]]> = resume-phi vp<[[VTC]]>, ir<0>
91; CHECK-NEXT: Successor(s): ir-bb<loop>
92; CHECK-EMPTY:
93; CHECK-NEXT: ir-bb<loop>:
94; CHECK-NEXT:   IR   %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] (extra operand: vp<[[RESUME]]> from scalar.ph)
95; CHECK:        IR   %cmp = icmp ne i64 %iv.next, 1024
96; CHECK-NEXT: No successors
97; CHECK-EMPTY:
98; CHECK-NEXT: ir-bb<exit>:
99; CHECK-NEXT: No successors
100; CHECK-NEXT: }
101;
102;
103define void @test(ptr noalias %src, ptr noalias %dst) {
104; CHECK-LABEL: @test(
105; CHECK:       vector.body:
106; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
107; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
108; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
109; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i64 [[TMP0]]
110; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 0
111; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP3]], align 4
112; CHECK-NEXT:    [[TMP4:%.*]] = fpext <2 x float> [[WIDE_LOAD]] to <2 x double>
113; CHECK-NEXT:    [[TMP5:%.*]] = call fast <2 x double> @__simd_sin_v2f64(<2 x double> [[TMP4]])
114; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], i64 [[TMP0]]
115; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[TMP1]]
116; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x double> [[TMP5]], i32 0
117; CHECK-NEXT:    store double [[TMP8]], ptr [[TMP6]], align 8
118; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x double> [[TMP5]], i32 1
119; CHECK-NEXT:    store double [[TMP9]], ptr [[TMP7]], align 8
120; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
121; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
122; CHECK-NEXT:    br i1 [[TMP10]], label %middle.block, label %vector.body
123;
124entry:
125  br label %loop
126
127loop:
128  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
129  %gep.src = getelementptr inbounds float, ptr %src, i64 %iv
130  %l = load float, ptr %gep.src, align 4
131  %conv = fpext float %l to double
132  %s = call fast double @llvm.sin.f64(double %conv) #0
133  %gep.dst = getelementptr inbounds float, ptr %dst, i64 %iv
134  store double %s, ptr %gep.dst
135  %iv.next = add nsw i64 %iv, 1
136  %cmp = icmp ne i64 %iv.next, 1024
137  br i1 %cmp, label %loop, label %exit
138
139exit:
140  ret void
141}
142
143declare double @llvm.sin.f64(double)
144
145declare <2 x double> @__simd_sin_v2f64(<2 x double>)
146
147attributes #0 = { "vector-function-abi-variant"="_ZGV_LLVM_N2v_llvm.sin.f64(__simd_sin_v2f64)" }
148