xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/resized-alt-shuffle-after-minbw.ll (revision 07d284d4ebffd58d4b2934769b4e11fedd0b106e)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2; RUN: opt -S --passes=slp-vectorizer -slp-vectorize-hor=false < %s | FileCheck %s
3
4define void @func(i32 %0) {
5; CHECK-LABEL: define void @func(
6; CHECK-SAME: i32 [[TMP0:%.*]]) {
7; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 0, i32 0>, i32 [[TMP0]], i32 1
8; CHECK-NEXT:    [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], zeroinitializer
9; CHECK-NEXT:    [[TMP4:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
10; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
11; CHECK-NEXT:    [[TMP6:%.*]] = shl i32 [[TMP0]], 0
12; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 0
13; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <32 x i32> <i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 3>
14; CHECK-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP6]] to i64
15; CHECK-NEXT:    [[TMP10:%.*]] = or i64 [[TMP9]], 0
16; CHECK-NEXT:    [[TMP11:%.*]] = trunc i64 [[TMP9]] to i32
17; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, i32 [[TMP11]], i32 30
18; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <32 x i32> [[TMP12]], <32 x i32> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 30, i32 30>
19; CHECK-NEXT:    [[TMP14:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v8i32(<32 x i32> [[TMP13]], <8 x i32> zeroinitializer, i64 16)
20; CHECK-NEXT:    [[TMP15:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v4i32(<32 x i32> [[TMP14]], <4 x i32> zeroinitializer, i64 24)
21; CHECK-NEXT:    [[TMP16:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v2i32(<32 x i32> [[TMP15]], <2 x i32> zeroinitializer, i64 14)
22; CHECK-NEXT:    [[TMP17:%.*]] = call <32 x i32> @llvm.vector.insert.v32i32.v2i32(<32 x i32> [[TMP16]], <2 x i32> zeroinitializer, i64 28)
23; CHECK-NEXT:    [[TMP18:%.*]] = or <32 x i32> [[TMP8]], [[TMP17]]
24; CHECK-NEXT:    [[TMP19:%.*]] = sext <32 x i32> [[TMP18]] to <32 x i64>
25; CHECK-NEXT:    [[TMP20:%.*]] = icmp slt <32 x i64> [[TMP19]], zeroinitializer
26; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <32 x i1> [[TMP20]], i32 31
27; CHECK-NEXT:    [[TMP22:%.*]] = and i1 false, [[TMP21]]
28; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <32 x i1> [[TMP20]], i32 30
29; CHECK-NEXT:    [[TMP24:%.*]] = and i1 false, [[TMP23]]
30; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <32 x i1> [[TMP20]], i32 29
31; CHECK-NEXT:    [[TMP26:%.*]] = and i1 false, [[TMP25]]
32; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <32 x i1> [[TMP20]], i32 28
33; CHECK-NEXT:    [[TMP28:%.*]] = and i1 false, [[TMP27]]
34; CHECK-NEXT:    [[TMP29:%.*]] = extractelement <32 x i1> [[TMP20]], i32 27
35; CHECK-NEXT:    [[TMP30:%.*]] = and i1 false, [[TMP29]]
36; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <32 x i1> [[TMP20]], i32 26
37; CHECK-NEXT:    [[TMP32:%.*]] = and i1 false, [[TMP31]]
38; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <32 x i1> [[TMP20]], i32 25
39; CHECK-NEXT:    [[TMP34:%.*]] = and i1 false, [[TMP33]]
40; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <32 x i1> [[TMP20]], i32 24
41; CHECK-NEXT:    [[TMP36:%.*]] = and i1 false, [[TMP35]]
42; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <32 x i1> [[TMP20]], i32 23
43; CHECK-NEXT:    [[TMP38:%.*]] = and i1 false, [[TMP37]]
44; CHECK-NEXT:    [[TMP39:%.*]] = extractelement <32 x i1> [[TMP20]], i32 22
45; CHECK-NEXT:    [[TMP40:%.*]] = and i1 false, [[TMP39]]
46; CHECK-NEXT:    [[TMP41:%.*]] = extractelement <32 x i1> [[TMP20]], i32 21
47; CHECK-NEXT:    [[TMP42:%.*]] = and i1 false, [[TMP41]]
48; CHECK-NEXT:    [[TMP43:%.*]] = extractelement <32 x i1> [[TMP20]], i32 20
49; CHECK-NEXT:    [[TMP44:%.*]] = and i1 false, [[TMP43]]
50; CHECK-NEXT:    [[TMP45:%.*]] = extractelement <32 x i1> [[TMP20]], i32 19
51; CHECK-NEXT:    [[TMP46:%.*]] = and i1 false, [[TMP45]]
52; CHECK-NEXT:    [[TMP47:%.*]] = extractelement <32 x i1> [[TMP20]], i32 18
53; CHECK-NEXT:    [[TMP48:%.*]] = and i1 false, [[TMP47]]
54; CHECK-NEXT:    [[TMP49:%.*]] = extractelement <32 x i1> [[TMP20]], i32 17
55; CHECK-NEXT:    [[TMP50:%.*]] = and i1 false, [[TMP49]]
56; CHECK-NEXT:    [[TMP51:%.*]] = extractelement <32 x i1> [[TMP20]], i32 16
57; CHECK-NEXT:    [[TMP52:%.*]] = and i1 false, [[TMP51]]
58; CHECK-NEXT:    [[TMP53:%.*]] = extractelement <32 x i1> [[TMP20]], i32 15
59; CHECK-NEXT:    [[TMP54:%.*]] = and i1 false, [[TMP53]]
60; CHECK-NEXT:    [[TMP55:%.*]] = extractelement <32 x i1> [[TMP20]], i32 14
61; CHECK-NEXT:    [[TMP56:%.*]] = and i1 false, [[TMP55]]
62; CHECK-NEXT:    [[TMP57:%.*]] = extractelement <32 x i1> [[TMP20]], i32 13
63; CHECK-NEXT:    [[TMP58:%.*]] = and i1 false, [[TMP57]]
64; CHECK-NEXT:    [[TMP59:%.*]] = extractelement <32 x i1> [[TMP20]], i32 12
65; CHECK-NEXT:    [[TMP60:%.*]] = and i1 false, [[TMP59]]
66; CHECK-NEXT:    [[TMP61:%.*]] = extractelement <32 x i1> [[TMP20]], i32 11
67; CHECK-NEXT:    [[TMP62:%.*]] = and i1 false, [[TMP61]]
68; CHECK-NEXT:    [[TMP63:%.*]] = extractelement <32 x i1> [[TMP20]], i32 10
69; CHECK-NEXT:    [[TMP64:%.*]] = and i1 false, [[TMP63]]
70; CHECK-NEXT:    [[TMP65:%.*]] = extractelement <32 x i1> [[TMP20]], i32 9
71; CHECK-NEXT:    [[TMP66:%.*]] = and i1 false, [[TMP65]]
72; CHECK-NEXT:    [[TMP67:%.*]] = extractelement <32 x i1> [[TMP20]], i32 8
73; CHECK-NEXT:    [[TMP68:%.*]] = and i1 false, [[TMP67]]
74; CHECK-NEXT:    [[TMP69:%.*]] = extractelement <32 x i1> [[TMP20]], i32 7
75; CHECK-NEXT:    [[TMP70:%.*]] = and i1 false, [[TMP69]]
76; CHECK-NEXT:    [[TMP71:%.*]] = extractelement <32 x i1> [[TMP20]], i32 6
77; CHECK-NEXT:    [[TMP72:%.*]] = and i1 false, [[TMP71]]
78; CHECK-NEXT:    [[TMP73:%.*]] = extractelement <32 x i1> [[TMP20]], i32 5
79; CHECK-NEXT:    [[TMP74:%.*]] = and i1 false, [[TMP73]]
80; CHECK-NEXT:    [[TMP75:%.*]] = extractelement <32 x i1> [[TMP20]], i32 4
81; CHECK-NEXT:    [[TMP76:%.*]] = and i1 false, [[TMP75]]
82; CHECK-NEXT:    [[TMP77:%.*]] = extractelement <32 x i32> [[TMP18]], i32 0
83; CHECK-NEXT:    [[TMP78:%.*]] = zext i32 [[TMP77]] to i64
84; CHECK-NEXT:    [[TMP79:%.*]] = getelementptr float, ptr addrspace(1) null, i64 [[TMP78]]
85; CHECK-NEXT:    ret void
86;
87  %2 = shl i32 %0, 0
88  %3 = sext i32 %2 to i64
89  %4 = shl i32 0, 0
90  %5 = sext i32 %4 to i64
91  %6 = or i32 0, 0
92  %7 = or i32 0, 0
93  %8 = zext i32 %6 to i64
94  %9 = zext i32 %7 to i64
95  %10 = zext i32 0 to i64
96  %11 = zext i32 0 to i64
97  %12 = zext i32 0 to i64
98  %13 = zext i32 0 to i64
99  %14 = zext i32 0 to i64
100  %15 = zext i32 0 to i64
101  %16 = zext i32 0 to i64
102  %17 = zext i32 0 to i64
103  %18 = zext i32 0 to i64
104  %19 = zext i32 0 to i64
105  %20 = zext i32 0 to i64
106  %21 = zext i32 0 to i64
107  %22 = zext i32 0 to i64
108  %23 = zext i32 0 to i64
109  %24 = zext i32 0 to i64
110  %25 = zext i32 0 to i64
111  %26 = zext i32 0 to i64
112  %27 = or i64 %3, 0
113  %28 = or i64 %3, %8
114  %29 = or i64 %3, %9
115  %30 = or i64 %3, %10
116  %31 = or i64 %3, %11
117  %32 = or i64 %3, %12
118  %33 = or i64 %3, %13
119  %34 = or i64 %3, %14
120  %35 = or i64 %3, %15
121  %36 = or i64 %3, %16
122  %37 = or i64 %3, %17
123  %38 = or i64 %3, %18
124  %39 = or i64 %3, %19
125  %40 = or i64 %3, %20
126  %41 = or i64 %3, %21
127  %42 = or i64 %3, %22
128  %43 = or i64 %3, %23
129  %44 = or i64 %3, %24
130  %45 = or i64 %3, %25
131  %46 = or i64 %3, 0
132  %47 = or i64 %3, 0
133  %48 = or i64 %3, 0
134  %49 = or i64 %3, 0
135  %50 = or i64 %3, 0
136  %51 = or i64 %3, 0
137  %52 = or i64 %3, 0
138  %53 = or i64 %3, 0
139  %54 = or i64 %3, 0
140  %55 = or i64 %3, 0
141  %56 = or i64 %3, 0
142  %57 = or i64 %3, 0
143  %58 = or i64 %3, 0
144  %59 = icmp slt i64 %28, 0
145  %60 = icmp slt i64 %29, 0
146  %61 = icmp slt i64 %30, 0
147  %62 = icmp slt i64 %31, 0
148  %63 = icmp slt i64 %32, 0
149  %64 = icmp slt i64 %33, 0
150  %65 = icmp slt i64 %34, 0
151  %66 = icmp slt i64 %35, 0
152  %67 = icmp slt i64 %36, 0
153  %68 = icmp slt i64 %37, 0
154  %69 = icmp slt i64 %38, 0
155  %70 = icmp slt i64 %39, 0
156  %71 = icmp slt i64 %40, 0
157  %72 = icmp slt i64 %41, 0
158  %73 = icmp slt i64 %42, 0
159  %74 = icmp slt i64 %43, 0
160  %75 = icmp slt i64 %44, 0
161  %76 = icmp slt i64 %45, 0
162  %77 = icmp slt i64 %46, 0
163  %78 = icmp slt i64 %47, 0
164  %79 = icmp slt i64 %48, 0
165  %80 = icmp slt i64 %49, 0
166  %81 = icmp slt i64 %50, 0
167  %82 = icmp slt i64 %51, 0
168  %83 = icmp slt i64 %52, 0
169  %84 = icmp slt i64 %53, 0
170  %85 = icmp slt i64 %54, 0
171  %86 = icmp slt i64 %55, 0
172  %87 = icmp slt i64 %56, 0
173  %88 = icmp slt i64 %57, 0
174  %89 = icmp slt i64 %58, 0
175  %90 = and i1 false, %59
176  %91 = and i1 false, %60
177  %92 = and i1 false, %61
178  %93 = and i1 false, %62
179  %94 = and i1 false, %63
180  %95 = and i1 false, %64
181  %96 = and i1 false, %65
182  %97 = and i1 false, %66
183  %98 = and i1 false, %67
184  %99 = and i1 false, %68
185  %100 = and i1 false, %69
186  %101 = and i1 false, %70
187  %102 = and i1 false, %71
188  %103 = and i1 false, %72
189  %104 = and i1 false, %73
190  %105 = and i1 false, %74
191  %106 = and i1 false, %75
192  %107 = and i1 false, %76
193  %108 = icmp eq i32 %2, 0
194  %109 = and i1 false, %77
195  %110 = and i1 false, %78
196  %111 = and i1 false, %79
197  %112 = and i1 false, %80
198  %113 = and i1 false, %81
199  %114 = and i1 false, %82
200  %115 = and i1 false, %83
201  %116 = and i1 false, %84
202  %117 = and i1 false, %85
203  %118 = and i1 false, %86
204  %119 = or i64 %5, %26
205  %120 = getelementptr float, ptr addrspace(1) null, i64 %119
206  %121 = icmp slt i64 %119, 0
207  ret void
208}
209