xref: /llvm-project/llvm/test/Transforms/LoopVectorize/uniform-args-call-variants.ll (revision a105877646d68e48cdeeeadd9d1e075dc3c5d68d)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
2; RUN: opt < %s -passes=loop-vectorize,instcombine -force-vector-width=2 -force-vector-interleave=1 -S | FileCheck %s
3
4; A call whose argument can remain a scalar for a vectorized function variant
5; with a uniform argument because it's loop invariant
6define void @test_uniform(ptr noalias %dst, ptr readonly %src, i64 %uniform , i64 %n) {
7; CHECK-LABEL: define void @test_uniform
8; CHECK-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[UNIFORM:%.*]], i64 [[N:%.*]]) {
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
11; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
12; CHECK:       vector.ph:
13; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -2
14; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
15; CHECK:       vector.body:
16; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
17; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
18; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
19; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @foo_uniform(<2 x double> [[WIDE_LOAD]], i64 [[UNIFORM]])
20; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
21; CHECK-NEXT:    store <2 x double> [[TMP1]], ptr [[TMP2]], align 8
22; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
23; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
24; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
25; CHECK:       middle.block:
26; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
27; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
28; CHECK:       scalar.ph:
29; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
30; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
31; CHECK:       for.body:
32; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
33; CHECK-NEXT:    [[GEPSRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDVARS_IV]]
34; CHECK-NEXT:    [[DATA:%.*]] = load double, ptr [[GEPSRC]], align 8
35; CHECK-NEXT:    [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[UNIFORM]]) #[[ATTR0:[0-9]+]]
36; CHECK-NEXT:    [[GEPDST:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDVARS_IV]]
37; CHECK-NEXT:    store double [[CALL]], ptr [[GEPDST]], align 8
38; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
39; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
40; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
41; CHECK:       for.cond.cleanup:
42; CHECK-NEXT:    ret void
43;
44entry:
45  br label %for.body
46
47for.body:
48  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
49  %gepsrc = getelementptr double, ptr %src, i64 %indvars.iv
50  %data = load double, ptr %gepsrc, align 8
51  %call = call double @foo(double %data, i64 %uniform) #0
52  %gepdst = getelementptr inbounds double, ptr %dst, i64 %indvars.iv
53  store double %call, ptr %gepdst
54  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
55  %exitcond = icmp eq i64 %indvars.iv.next, %n
56  br i1 %exitcond, label %for.cond.cleanup, label %for.body
57
58for.cond.cleanup:
59  ret void
60}
61
62; If the parameter is not uniform, then we can't use the vector variant and
63; must fall back to scalarization.
64define void @test_uniform_not_invariant(ptr noalias %dst, ptr readonly %src, i64 %n) {
65; CHECK-LABEL: define void @test_uniform_not_invariant
66; CHECK-SAME: (ptr noalias [[DST:%.*]], ptr readonly [[SRC:%.*]], i64 [[N:%.*]]) {
67; CHECK-NEXT:  entry:
68; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
69; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
70; CHECK:       vector.ph:
71; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -2
72; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
73; CHECK:       vector.body:
74; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
75; CHECK-NEXT:    [[TMP0:%.*]] = or disjoint i64 [[INDEX]], 1
76; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDEX]]
77; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x double>, ptr [[TMP1]], align 8
78; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[WIDE_LOAD]], i64 0
79; CHECK-NEXT:    [[TMP3:%.*]] = call double @foo(double [[TMP2]], i64 [[INDEX]]) #[[ATTR0]]
80; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[WIDE_LOAD]], i64 1
81; CHECK-NEXT:    [[TMP5:%.*]] = call double @foo(double [[TMP4]], i64 [[TMP0]]) #[[ATTR0]]
82; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> poison, double [[TMP3]], i64 0
83; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> [[TMP6]], double [[TMP5]], i64 1
84; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDEX]]
85; CHECK-NEXT:    store <2 x double> [[TMP7]], ptr [[TMP8]], align 8
86; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
87; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
88; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
89; CHECK:       middle.block:
90; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
91; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
92; CHECK:       scalar.ph:
93; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
94; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
95; CHECK:       for.body:
96; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
97; CHECK-NEXT:    [[GEPSRC:%.*]] = getelementptr double, ptr [[SRC]], i64 [[INDVARS_IV]]
98; CHECK-NEXT:    [[DATA:%.*]] = load double, ptr [[GEPSRC]], align 8
99; CHECK-NEXT:    [[CALL:%.*]] = call double @foo(double [[DATA]], i64 [[INDVARS_IV]]) #[[ATTR0]]
100; CHECK-NEXT:    [[GEPDST:%.*]] = getelementptr inbounds double, ptr [[DST]], i64 [[INDVARS_IV]]
101; CHECK-NEXT:    store double [[CALL]], ptr [[GEPDST]], align 8
102; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
103; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
104; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
105; CHECK:       for.cond.cleanup:
106; CHECK-NEXT:    ret void
107;
108entry:
109  br label %for.body
110
111for.body:
112  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
113  %gepsrc = getelementptr double, ptr %src, i64 %indvars.iv
114  %data = load double, ptr %gepsrc, align 8
115  %call = call double @foo(double %data, i64 %indvars.iv) #0
116  %gepdst = getelementptr inbounds double, ptr %dst, i64 %indvars.iv
117  store double %call, ptr %gepdst
118  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
119  %exitcond = icmp eq i64 %indvars.iv.next, %n
120  br i1 %exitcond, label %for.cond.cleanup, label %for.body
121
122for.cond.cleanup:
123  ret void
124}
125
126; Scalar functions
127declare double @foo(double, i64)
128
129; Vector variants
130declare <2 x double> @foo_uniform(<2 x double>, i64)
131
132; Mappings
133attributes #0 = { nounwind "vector-function-abi-variant"="_ZGV_LLVM_N2vu_foo(foo_uniform)" }
134