xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll (revision f7685af4a5bd188e6d548967d818d8569f10a70d)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -passes=slp-vectorizer,dce,instcombine < %s | FileCheck %s --check-prefix=GENERIC
3; RUN: opt -S -mcpu=kryo -passes=slp-vectorizer,dce,instcombine < %s | FileCheck %s --check-prefix=KRYO
4
5target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
6target triple = "aarch64--linux-gnu"
7
8; These tests check that we vectorize the index calculations in the
9; gather-reduce pattern shown below. We check cases having i32 and i64
10; subtraction.
11;
12; int gather_reduce_8x16(short *a, short *b, short *g, int n) {
13;   int sum = 0;
14;   for (int i = 0; i < n ; ++i) {
15;     sum += g[*a++ - b[0]]; sum += g[*a++ - b[1]];
16;     sum += g[*a++ - b[2]]; sum += g[*a++ - b[3]];
17;     sum += g[*a++ - b[4]]; sum += g[*a++ - b[5]];
18;     sum += g[*a++ - b[6]]; sum += g[*a++ - b[7]];
19;   }
20;   return sum;
21; }
22
23define i32 @gather_reduce_8x16_i32(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture readonly %g, i32 %n) {
24; GENERIC-LABEL: @gather_reduce_8x16_i32(
25; GENERIC-NEXT:  entry:
26; GENERIC-NEXT:    [[CMP_99:%.*]] = icmp sgt i32 [[N:%.*]], 0
27; GENERIC-NEXT:    br i1 [[CMP_99]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
28; GENERIC:       for.body.preheader:
29; GENERIC-NEXT:    br label [[FOR_BODY:%.*]]
30; GENERIC:       for.cond.cleanup.loopexit:
31; GENERIC-NEXT:    br label [[FOR_COND_CLEANUP]]
32; GENERIC:       for.cond.cleanup:
33; GENERIC-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD66:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
34; GENERIC-NEXT:    ret i32 [[SUM_0_LCSSA]]
35; GENERIC:       for.body:
36; GENERIC-NEXT:    [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
37; GENERIC-NEXT:    [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
38; GENERIC-NEXT:    [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
39; GENERIC-NEXT:    [[INCDEC_PTR58]] = getelementptr inbounds nuw i8, ptr [[A_ADDR_0101]], i64 16
40; GENERIC-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
41; GENERIC-NEXT:    [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
42; GENERIC-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
43; GENERIC-NEXT:    [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
44; GENERIC-NEXT:    [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
45; GENERIC-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
46; GENERIC-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
47; GENERIC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP6]]
48; GENERIC-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
49; GENERIC-NEXT:    [[CONV3:%.*]] = zext i16 [[TMP7]] to i32
50; GENERIC-NEXT:    [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
51; GENERIC-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i64 1
52; GENERIC-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
53; GENERIC-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP9]]
54; GENERIC-NEXT:    [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
55; GENERIC-NEXT:    [[CONV11:%.*]] = zext i16 [[TMP10]] to i32
56; GENERIC-NEXT:    [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
57; GENERIC-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i64 2
58; GENERIC-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
59; GENERIC-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP12]]
60; GENERIC-NEXT:    [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
61; GENERIC-NEXT:    [[CONV20:%.*]] = zext i16 [[TMP13]] to i32
62; GENERIC-NEXT:    [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
63; GENERIC-NEXT:    [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i64 3
64; GENERIC-NEXT:    [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
65; GENERIC-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP15]]
66; GENERIC-NEXT:    [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
67; GENERIC-NEXT:    [[CONV29:%.*]] = zext i16 [[TMP16]] to i32
68; GENERIC-NEXT:    [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]]
69; GENERIC-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i64 4
70; GENERIC-NEXT:    [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
71; GENERIC-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP18]]
72; GENERIC-NEXT:    [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2
73; GENERIC-NEXT:    [[CONV38:%.*]] = zext i16 [[TMP19]] to i32
74; GENERIC-NEXT:    [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]]
75; GENERIC-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i64 5
76; GENERIC-NEXT:    [[TMP21:%.*]] = sext i32 [[TMP20]] to i64
77; GENERIC-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP21]]
78; GENERIC-NEXT:    [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2
79; GENERIC-NEXT:    [[CONV47:%.*]] = zext i16 [[TMP22]] to i32
80; GENERIC-NEXT:    [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]]
81; GENERIC-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP4]], i64 6
82; GENERIC-NEXT:    [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
83; GENERIC-NEXT:    [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP24]]
84; GENERIC-NEXT:    [[TMP25:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2
85; GENERIC-NEXT:    [[CONV56:%.*]] = zext i16 [[TMP25]] to i32
86; GENERIC-NEXT:    [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
87; GENERIC-NEXT:    [[TMP26:%.*]] = extractelement <8 x i32> [[TMP4]], i64 7
88; GENERIC-NEXT:    [[TMP27:%.*]] = sext i32 [[TMP26]] to i64
89; GENERIC-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP27]]
90; GENERIC-NEXT:    [[TMP28:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2
91; GENERIC-NEXT:    [[CONV65:%.*]] = zext i16 [[TMP28]] to i32
92; GENERIC-NEXT:    [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]]
93; GENERIC-NEXT:    [[INC]] = add nuw nsw i32 [[I_0103]], 1
94; GENERIC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
95; GENERIC-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
96;
97; KRYO-LABEL: @gather_reduce_8x16_i32(
98; KRYO-NEXT:  entry:
99; KRYO-NEXT:    [[CMP_99:%.*]] = icmp sgt i32 [[N:%.*]], 0
100; KRYO-NEXT:    br i1 [[CMP_99]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
101; KRYO:       for.body.preheader:
102; KRYO-NEXT:    br label [[FOR_BODY:%.*]]
103; KRYO:       for.cond.cleanup.loopexit:
104; KRYO-NEXT:    br label [[FOR_COND_CLEANUP]]
105; KRYO:       for.cond.cleanup:
106; KRYO-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD66:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
107; KRYO-NEXT:    ret i32 [[SUM_0_LCSSA]]
108; KRYO:       for.body:
109; KRYO-NEXT:    [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
110; KRYO-NEXT:    [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
111; KRYO-NEXT:    [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
112; KRYO-NEXT:    [[INCDEC_PTR58]] = getelementptr inbounds nuw i8, ptr [[A_ADDR_0101]], i64 16
113; KRYO-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
114; KRYO-NEXT:    [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
115; KRYO-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
116; KRYO-NEXT:    [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
117; KRYO-NEXT:    [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
118; KRYO-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
119; KRYO-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
120; KRYO-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP6]]
121; KRYO-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
122; KRYO-NEXT:    [[CONV3:%.*]] = zext i16 [[TMP7]] to i32
123; KRYO-NEXT:    [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
124; KRYO-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i64 1
125; KRYO-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
126; KRYO-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP9]]
127; KRYO-NEXT:    [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
128; KRYO-NEXT:    [[CONV11:%.*]] = zext i16 [[TMP10]] to i32
129; KRYO-NEXT:    [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
130; KRYO-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i64 2
131; KRYO-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
132; KRYO-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP12]]
133; KRYO-NEXT:    [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
134; KRYO-NEXT:    [[CONV20:%.*]] = zext i16 [[TMP13]] to i32
135; KRYO-NEXT:    [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
136; KRYO-NEXT:    [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i64 3
137; KRYO-NEXT:    [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
138; KRYO-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP15]]
139; KRYO-NEXT:    [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
140; KRYO-NEXT:    [[CONV29:%.*]] = zext i16 [[TMP16]] to i32
141; KRYO-NEXT:    [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]]
142; KRYO-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i64 4
143; KRYO-NEXT:    [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
144; KRYO-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP18]]
145; KRYO-NEXT:    [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2
146; KRYO-NEXT:    [[CONV38:%.*]] = zext i16 [[TMP19]] to i32
147; KRYO-NEXT:    [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]]
148; KRYO-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i64 5
149; KRYO-NEXT:    [[TMP21:%.*]] = sext i32 [[TMP20]] to i64
150; KRYO-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP21]]
151; KRYO-NEXT:    [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2
152; KRYO-NEXT:    [[CONV47:%.*]] = zext i16 [[TMP22]] to i32
153; KRYO-NEXT:    [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]]
154; KRYO-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP4]], i64 6
155; KRYO-NEXT:    [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
156; KRYO-NEXT:    [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP24]]
157; KRYO-NEXT:    [[TMP25:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2
158; KRYO-NEXT:    [[CONV56:%.*]] = zext i16 [[TMP25]] to i32
159; KRYO-NEXT:    [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
160; KRYO-NEXT:    [[TMP26:%.*]] = extractelement <8 x i32> [[TMP4]], i64 7
161; KRYO-NEXT:    [[TMP27:%.*]] = sext i32 [[TMP26]] to i64
162; KRYO-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP27]]
163; KRYO-NEXT:    [[TMP28:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2
164; KRYO-NEXT:    [[CONV65:%.*]] = zext i16 [[TMP28]] to i32
165; KRYO-NEXT:    [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]]
166; KRYO-NEXT:    [[INC]] = add nuw nsw i32 [[I_0103]], 1
167; KRYO-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
168; KRYO-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
169;
170entry:
171  %cmp.99 = icmp sgt i32 %n, 0
172  br i1 %cmp.99, label %for.body.preheader, label %for.cond.cleanup
173
174for.body.preheader:
175  br label %for.body
176
177for.cond.cleanup.loopexit:
178  br label %for.cond.cleanup
179
180for.cond.cleanup:
181  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add66, %for.cond.cleanup.loopexit ]
182  ret i32 %sum.0.lcssa
183
184for.body:
185  %i.0103 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
186  %sum.0102 = phi i32 [ %add66, %for.body ], [ 0, %for.body.preheader ]
187  %a.addr.0101 = phi ptr [ %incdec.ptr58, %for.body ], [ %a, %for.body.preheader ]
188  %incdec.ptr = getelementptr inbounds i16, ptr %a.addr.0101, i64 1
189  %0 = load i16, ptr %a.addr.0101, align 2
190  %conv = zext i16 %0 to i32
191  %incdec.ptr1 = getelementptr inbounds i16, ptr %b, i64 1
192  %1 = load i16, ptr %b, align 2
193  %conv2 = zext i16 %1 to i32
194  %sub = sub nsw i32 %conv, %conv2
195  %arrayidx = getelementptr inbounds i16, ptr %g, i32 %sub
196  %2 = load i16, ptr %arrayidx, align 2
197  %conv3 = zext i16 %2 to i32
198  %add = add nsw i32 %conv3, %sum.0102
199  %incdec.ptr4 = getelementptr inbounds i16, ptr %a.addr.0101, i64 2
200  %3 = load i16, ptr %incdec.ptr, align 2
201  %conv5 = zext i16 %3 to i32
202  %incdec.ptr6 = getelementptr inbounds i16, ptr %b, i64 2
203  %4 = load i16, ptr %incdec.ptr1, align 2
204  %conv7 = zext i16 %4 to i32
205  %sub8 = sub nsw i32 %conv5, %conv7
206  %arrayidx10 = getelementptr inbounds i16, ptr %g, i32 %sub8
207  %5 = load i16, ptr %arrayidx10, align 2
208  %conv11 = zext i16 %5 to i32
209  %add12 = add nsw i32 %add, %conv11
210  %incdec.ptr13 = getelementptr inbounds i16, ptr %a.addr.0101, i64 3
211  %6 = load i16, ptr %incdec.ptr4, align 2
212  %conv14 = zext i16 %6 to i32
213  %incdec.ptr15 = getelementptr inbounds i16, ptr %b, i64 3
214  %7 = load i16, ptr %incdec.ptr6, align 2
215  %conv16 = zext i16 %7 to i32
216  %sub17 = sub nsw i32 %conv14, %conv16
217  %arrayidx19 = getelementptr inbounds i16, ptr %g, i32 %sub17
218  %8 = load i16, ptr %arrayidx19, align 2
219  %conv20 = zext i16 %8 to i32
220  %add21 = add nsw i32 %add12, %conv20
221  %incdec.ptr22 = getelementptr inbounds i16, ptr %a.addr.0101, i64 4
222  %9 = load i16, ptr %incdec.ptr13, align 2
223  %conv23 = zext i16 %9 to i32
224  %incdec.ptr24 = getelementptr inbounds i16, ptr %b, i64 4
225  %10 = load i16, ptr %incdec.ptr15, align 2
226  %conv25 = zext i16 %10 to i32
227  %sub26 = sub nsw i32 %conv23, %conv25
228  %arrayidx28 = getelementptr inbounds i16, ptr %g, i32 %sub26
229  %11 = load i16, ptr %arrayidx28, align 2
230  %conv29 = zext i16 %11 to i32
231  %add30 = add nsw i32 %add21, %conv29
232  %incdec.ptr31 = getelementptr inbounds i16, ptr %a.addr.0101, i64 5
233  %12 = load i16, ptr %incdec.ptr22, align 2
234  %conv32 = zext i16 %12 to i32
235  %incdec.ptr33 = getelementptr inbounds i16, ptr %b, i64 5
236  %13 = load i16, ptr %incdec.ptr24, align 2
237  %conv34 = zext i16 %13 to i32
238  %sub35 = sub nsw i32 %conv32, %conv34
239  %arrayidx37 = getelementptr inbounds i16, ptr %g, i32 %sub35
240  %14 = load i16, ptr %arrayidx37, align 2
241  %conv38 = zext i16 %14 to i32
242  %add39 = add nsw i32 %add30, %conv38
243  %incdec.ptr40 = getelementptr inbounds i16, ptr %a.addr.0101, i64 6
244  %15 = load i16, ptr %incdec.ptr31, align 2
245  %conv41 = zext i16 %15 to i32
246  %incdec.ptr42 = getelementptr inbounds i16, ptr %b, i64 6
247  %16 = load i16, ptr %incdec.ptr33, align 2
248  %conv43 = zext i16 %16 to i32
249  %sub44 = sub nsw i32 %conv41, %conv43
250  %arrayidx46 = getelementptr inbounds i16, ptr %g, i32 %sub44
251  %17 = load i16, ptr %arrayidx46, align 2
252  %conv47 = zext i16 %17 to i32
253  %add48 = add nsw i32 %add39, %conv47
254  %incdec.ptr49 = getelementptr inbounds i16, ptr %a.addr.0101, i64 7
255  %18 = load i16, ptr %incdec.ptr40, align 2
256  %conv50 = zext i16 %18 to i32
257  %incdec.ptr51 = getelementptr inbounds i16, ptr %b, i64 7
258  %19 = load i16, ptr %incdec.ptr42, align 2
259  %conv52 = zext i16 %19 to i32
260  %sub53 = sub nsw i32 %conv50, %conv52
261  %arrayidx55 = getelementptr inbounds i16, ptr %g, i32 %sub53
262  %20 = load i16, ptr %arrayidx55, align 2
263  %conv56 = zext i16 %20 to i32
264  %add57 = add nsw i32 %add48, %conv56
265  %incdec.ptr58 = getelementptr inbounds i16, ptr %a.addr.0101, i64 8
266  %21 = load i16, ptr %incdec.ptr49, align 2
267  %conv59 = zext i16 %21 to i32
268  %22 = load i16, ptr %incdec.ptr51, align 2
269  %conv61 = zext i16 %22 to i32
270  %sub62 = sub nsw i32 %conv59, %conv61
271  %arrayidx64 = getelementptr inbounds i16, ptr %g, i32 %sub62
272  %23 = load i16, ptr %arrayidx64, align 2
273  %conv65 = zext i16 %23 to i32
274  %add66 = add nsw i32 %add57, %conv65
275  %inc = add nuw nsw i32 %i.0103, 1
276  %exitcond = icmp eq i32 %inc, %n
277  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
278}
279
280define i32 @gather_reduce_8x16_i64(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture readonly %g, i32 %n) {
281; GENERIC-LABEL: @gather_reduce_8x16_i64(
282; GENERIC-NEXT:  entry:
283; GENERIC-NEXT:    [[CMP_99:%.*]] = icmp sgt i32 [[N:%.*]], 0
284; GENERIC-NEXT:    br i1 [[CMP_99]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
285; GENERIC:       for.body.preheader:
286; GENERIC-NEXT:    br label [[FOR_BODY:%.*]]
287; GENERIC:       for.cond.cleanup.loopexit:
288; GENERIC-NEXT:    br label [[FOR_COND_CLEANUP]]
289; GENERIC:       for.cond.cleanup:
290; GENERIC-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD66:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
291; GENERIC-NEXT:    ret i32 [[SUM_0_LCSSA]]
292; GENERIC:       for.body:
293; GENERIC-NEXT:    [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
294; GENERIC-NEXT:    [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
295; GENERIC-NEXT:    [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
296; GENERIC-NEXT:    [[INCDEC_PTR58]] = getelementptr inbounds nuw i8, ptr [[A_ADDR_0101]], i64 16
297; GENERIC-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
298; GENERIC-NEXT:    [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
299; GENERIC-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
300; GENERIC-NEXT:    [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
301; GENERIC-NEXT:    [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
302; GENERIC-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
303; GENERIC-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
304; GENERIC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP6]]
305; GENERIC-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
306; GENERIC-NEXT:    [[CONV3:%.*]] = zext i16 [[TMP7]] to i32
307; GENERIC-NEXT:    [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
308; GENERIC-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i64 1
309; GENERIC-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
310; GENERIC-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP9]]
311; GENERIC-NEXT:    [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
312; GENERIC-NEXT:    [[CONV11:%.*]] = zext i16 [[TMP10]] to i32
313; GENERIC-NEXT:    [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
314; GENERIC-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i64 2
315; GENERIC-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
316; GENERIC-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP12]]
317; GENERIC-NEXT:    [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
318; GENERIC-NEXT:    [[CONV20:%.*]] = zext i16 [[TMP13]] to i32
319; GENERIC-NEXT:    [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
320; GENERIC-NEXT:    [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i64 3
321; GENERIC-NEXT:    [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
322; GENERIC-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP15]]
323; GENERIC-NEXT:    [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
324; GENERIC-NEXT:    [[CONV29:%.*]] = zext i16 [[TMP16]] to i32
325; GENERIC-NEXT:    [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]]
326; GENERIC-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i64 4
327; GENERIC-NEXT:    [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
328; GENERIC-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP18]]
329; GENERIC-NEXT:    [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2
330; GENERIC-NEXT:    [[CONV38:%.*]] = zext i16 [[TMP19]] to i32
331; GENERIC-NEXT:    [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]]
332; GENERIC-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i64 5
333; GENERIC-NEXT:    [[TMP21:%.*]] = sext i32 [[TMP20]] to i64
334; GENERIC-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP21]]
335; GENERIC-NEXT:    [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2
336; GENERIC-NEXT:    [[CONV47:%.*]] = zext i16 [[TMP22]] to i32
337; GENERIC-NEXT:    [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]]
338; GENERIC-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP4]], i64 6
339; GENERIC-NEXT:    [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
340; GENERIC-NEXT:    [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP24]]
341; GENERIC-NEXT:    [[TMP25:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2
342; GENERIC-NEXT:    [[CONV56:%.*]] = zext i16 [[TMP25]] to i32
343; GENERIC-NEXT:    [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
344; GENERIC-NEXT:    [[TMP26:%.*]] = extractelement <8 x i32> [[TMP4]], i64 7
345; GENERIC-NEXT:    [[TMP27:%.*]] = sext i32 [[TMP26]] to i64
346; GENERIC-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP27]]
347; GENERIC-NEXT:    [[TMP28:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2
348; GENERIC-NEXT:    [[CONV65:%.*]] = zext i16 [[TMP28]] to i32
349; GENERIC-NEXT:    [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]]
350; GENERIC-NEXT:    [[INC]] = add nuw nsw i32 [[I_0103]], 1
351; GENERIC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
352; GENERIC-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
353;
354; KRYO-LABEL: @gather_reduce_8x16_i64(
355; KRYO-NEXT:  entry:
356; KRYO-NEXT:    [[CMP_99:%.*]] = icmp sgt i32 [[N:%.*]], 0
357; KRYO-NEXT:    br i1 [[CMP_99]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
358; KRYO:       for.body.preheader:
359; KRYO-NEXT:    br label [[FOR_BODY:%.*]]
360; KRYO:       for.cond.cleanup.loopexit:
361; KRYO-NEXT:    br label [[FOR_COND_CLEANUP]]
362; KRYO:       for.cond.cleanup:
363; KRYO-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD66:%.*]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ]
364; KRYO-NEXT:    ret i32 [[SUM_0_LCSSA]]
365; KRYO:       for.body:
366; KRYO-NEXT:    [[I_0103:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
367; KRYO-NEXT:    [[SUM_0102:%.*]] = phi i32 [ [[ADD66]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
368; KRYO-NEXT:    [[A_ADDR_0101:%.*]] = phi ptr [ [[INCDEC_PTR58:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[FOR_BODY_PREHEADER]] ]
369; KRYO-NEXT:    [[INCDEC_PTR58]] = getelementptr inbounds nuw i8, ptr [[A_ADDR_0101]], i64 16
370; KRYO-NEXT:    [[TMP0:%.*]] = load <8 x i16>, ptr [[A_ADDR_0101]], align 2
371; KRYO-NEXT:    [[TMP1:%.*]] = zext <8 x i16> [[TMP0]] to <8 x i32>
372; KRYO-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2
373; KRYO-NEXT:    [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32>
374; KRYO-NEXT:    [[TMP4:%.*]] = sub nsw <8 x i32> [[TMP1]], [[TMP3]]
375; KRYO-NEXT:    [[TMP5:%.*]] = extractelement <8 x i32> [[TMP4]], i64 0
376; KRYO-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP5]] to i64
377; KRYO-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[G:%.*]], i64 [[TMP6]]
378; KRYO-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
379; KRYO-NEXT:    [[CONV3:%.*]] = zext i16 [[TMP7]] to i32
380; KRYO-NEXT:    [[ADD:%.*]] = add nsw i32 [[SUM_0102]], [[CONV3]]
381; KRYO-NEXT:    [[TMP8:%.*]] = extractelement <8 x i32> [[TMP4]], i64 1
382; KRYO-NEXT:    [[TMP9:%.*]] = sext i32 [[TMP8]] to i64
383; KRYO-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP9]]
384; KRYO-NEXT:    [[TMP10:%.*]] = load i16, ptr [[ARRAYIDX10]], align 2
385; KRYO-NEXT:    [[CONV11:%.*]] = zext i16 [[TMP10]] to i32
386; KRYO-NEXT:    [[ADD12:%.*]] = add nsw i32 [[ADD]], [[CONV11]]
387; KRYO-NEXT:    [[TMP11:%.*]] = extractelement <8 x i32> [[TMP4]], i64 2
388; KRYO-NEXT:    [[TMP12:%.*]] = sext i32 [[TMP11]] to i64
389; KRYO-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP12]]
390; KRYO-NEXT:    [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX19]], align 2
391; KRYO-NEXT:    [[CONV20:%.*]] = zext i16 [[TMP13]] to i32
392; KRYO-NEXT:    [[ADD21:%.*]] = add nsw i32 [[ADD12]], [[CONV20]]
393; KRYO-NEXT:    [[TMP14:%.*]] = extractelement <8 x i32> [[TMP4]], i64 3
394; KRYO-NEXT:    [[TMP15:%.*]] = sext i32 [[TMP14]] to i64
395; KRYO-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP15]]
396; KRYO-NEXT:    [[TMP16:%.*]] = load i16, ptr [[ARRAYIDX28]], align 2
397; KRYO-NEXT:    [[CONV29:%.*]] = zext i16 [[TMP16]] to i32
398; KRYO-NEXT:    [[ADD30:%.*]] = add nsw i32 [[ADD21]], [[CONV29]]
399; KRYO-NEXT:    [[TMP17:%.*]] = extractelement <8 x i32> [[TMP4]], i64 4
400; KRYO-NEXT:    [[TMP18:%.*]] = sext i32 [[TMP17]] to i64
401; KRYO-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP18]]
402; KRYO-NEXT:    [[TMP19:%.*]] = load i16, ptr [[ARRAYIDX37]], align 2
403; KRYO-NEXT:    [[CONV38:%.*]] = zext i16 [[TMP19]] to i32
404; KRYO-NEXT:    [[ADD39:%.*]] = add nsw i32 [[ADD30]], [[CONV38]]
405; KRYO-NEXT:    [[TMP20:%.*]] = extractelement <8 x i32> [[TMP4]], i64 5
406; KRYO-NEXT:    [[TMP21:%.*]] = sext i32 [[TMP20]] to i64
407; KRYO-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP21]]
408; KRYO-NEXT:    [[TMP22:%.*]] = load i16, ptr [[ARRAYIDX46]], align 2
409; KRYO-NEXT:    [[CONV47:%.*]] = zext i16 [[TMP22]] to i32
410; KRYO-NEXT:    [[ADD48:%.*]] = add nsw i32 [[ADD39]], [[CONV47]]
411; KRYO-NEXT:    [[TMP23:%.*]] = extractelement <8 x i32> [[TMP4]], i64 6
412; KRYO-NEXT:    [[TMP24:%.*]] = sext i32 [[TMP23]] to i64
413; KRYO-NEXT:    [[ARRAYIDX55:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP24]]
414; KRYO-NEXT:    [[TMP25:%.*]] = load i16, ptr [[ARRAYIDX55]], align 2
415; KRYO-NEXT:    [[CONV56:%.*]] = zext i16 [[TMP25]] to i32
416; KRYO-NEXT:    [[ADD57:%.*]] = add nsw i32 [[ADD48]], [[CONV56]]
417; KRYO-NEXT:    [[TMP26:%.*]] = extractelement <8 x i32> [[TMP4]], i64 7
418; KRYO-NEXT:    [[TMP27:%.*]] = sext i32 [[TMP26]] to i64
419; KRYO-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i16, ptr [[G]], i64 [[TMP27]]
420; KRYO-NEXT:    [[TMP28:%.*]] = load i16, ptr [[ARRAYIDX64]], align 2
421; KRYO-NEXT:    [[CONV65:%.*]] = zext i16 [[TMP28]] to i32
422; KRYO-NEXT:    [[ADD66]] = add nsw i32 [[ADD57]], [[CONV65]]
423; KRYO-NEXT:    [[INC]] = add nuw nsw i32 [[I_0103]], 1
424; KRYO-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
425; KRYO-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]]
426;
427entry:
428  %cmp.99 = icmp sgt i32 %n, 0
429  br i1 %cmp.99, label %for.body.preheader, label %for.cond.cleanup
430
431for.body.preheader:
432  br label %for.body
433
434for.cond.cleanup.loopexit:
435  br label %for.cond.cleanup
436
437for.cond.cleanup:
438  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add66, %for.cond.cleanup.loopexit ]
439  ret i32 %sum.0.lcssa
440
441for.body:
442  %i.0103 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
443  %sum.0102 = phi i32 [ %add66, %for.body ], [ 0, %for.body.preheader ]
444  %a.addr.0101 = phi ptr [ %incdec.ptr58, %for.body ], [ %a, %for.body.preheader ]
445  %incdec.ptr = getelementptr inbounds i16, ptr %a.addr.0101, i64 1
446  %0 = load i16, ptr %a.addr.0101, align 2
447  %conv = zext i16 %0 to i64
448  %incdec.ptr1 = getelementptr inbounds i16, ptr %b, i64 1
449  %1 = load i16, ptr %b, align 2
450  %conv2 = zext i16 %1 to i64
451  %sub = sub nsw i64 %conv, %conv2
452  %arrayidx = getelementptr inbounds i16, ptr %g, i64 %sub
453  %2 = load i16, ptr %arrayidx, align 2
454  %conv3 = zext i16 %2 to i32
455  %add = add nsw i32 %conv3, %sum.0102
456  %incdec.ptr4 = getelementptr inbounds i16, ptr %a.addr.0101, i64 2
457  %3 = load i16, ptr %incdec.ptr, align 2
458  %conv5 = zext i16 %3 to i64
459  %incdec.ptr6 = getelementptr inbounds i16, ptr %b, i64 2
460  %4 = load i16, ptr %incdec.ptr1, align 2
461  %conv7 = zext i16 %4 to i64
462  %sub8 = sub nsw i64 %conv5, %conv7
463  %arrayidx10 = getelementptr inbounds i16, ptr %g, i64 %sub8
464  %5 = load i16, ptr %arrayidx10, align 2
465  %conv11 = zext i16 %5 to i32
466  %add12 = add nsw i32 %add, %conv11
467  %incdec.ptr13 = getelementptr inbounds i16, ptr %a.addr.0101, i64 3
468  %6 = load i16, ptr %incdec.ptr4, align 2
469  %conv14 = zext i16 %6 to i64
470  %incdec.ptr15 = getelementptr inbounds i16, ptr %b, i64 3
471  %7 = load i16, ptr %incdec.ptr6, align 2
472  %conv16 = zext i16 %7 to i64
473  %sub17 = sub nsw i64 %conv14, %conv16
474  %arrayidx19 = getelementptr inbounds i16, ptr %g, i64 %sub17
475  %8 = load i16, ptr %arrayidx19, align 2
476  %conv20 = zext i16 %8 to i32
477  %add21 = add nsw i32 %add12, %conv20
478  %incdec.ptr22 = getelementptr inbounds i16, ptr %a.addr.0101, i64 4
479  %9 = load i16, ptr %incdec.ptr13, align 2
480  %conv23 = zext i16 %9 to i64
481  %incdec.ptr24 = getelementptr inbounds i16, ptr %b, i64 4
482  %10 = load i16, ptr %incdec.ptr15, align 2
483  %conv25 = zext i16 %10 to i64
484  %sub26 = sub nsw i64 %conv23, %conv25
485  %arrayidx28 = getelementptr inbounds i16, ptr %g, i64 %sub26
486  %11 = load i16, ptr %arrayidx28, align 2
487  %conv29 = zext i16 %11 to i32
488  %add30 = add nsw i32 %add21, %conv29
489  %incdec.ptr31 = getelementptr inbounds i16, ptr %a.addr.0101, i64 5
490  %12 = load i16, ptr %incdec.ptr22, align 2
491  %conv32 = zext i16 %12 to i64
492  %incdec.ptr33 = getelementptr inbounds i16, ptr %b, i64 5
493  %13 = load i16, ptr %incdec.ptr24, align 2
494  %conv34 = zext i16 %13 to i64
495  %sub35 = sub nsw i64 %conv32, %conv34
496  %arrayidx37 = getelementptr inbounds i16, ptr %g, i64 %sub35
497  %14 = load i16, ptr %arrayidx37, align 2
498  %conv38 = zext i16 %14 to i32
499  %add39 = add nsw i32 %add30, %conv38
500  %incdec.ptr40 = getelementptr inbounds i16, ptr %a.addr.0101, i64 6
501  %15 = load i16, ptr %incdec.ptr31, align 2
502  %conv41 = zext i16 %15 to i64
503  %incdec.ptr42 = getelementptr inbounds i16, ptr %b, i64 6
504  %16 = load i16, ptr %incdec.ptr33, align 2
505  %conv43 = zext i16 %16 to i64
506  %sub44 = sub nsw i64 %conv41, %conv43
507  %arrayidx46 = getelementptr inbounds i16, ptr %g, i64 %sub44
508  %17 = load i16, ptr %arrayidx46, align 2
509  %conv47 = zext i16 %17 to i32
510  %add48 = add nsw i32 %add39, %conv47
511  %incdec.ptr49 = getelementptr inbounds i16, ptr %a.addr.0101, i64 7
512  %18 = load i16, ptr %incdec.ptr40, align 2
513  %conv50 = zext i16 %18 to i64
514  %incdec.ptr51 = getelementptr inbounds i16, ptr %b, i64 7
515  %19 = load i16, ptr %incdec.ptr42, align 2
516  %conv52 = zext i16 %19 to i64
517  %sub53 = sub nsw i64 %conv50, %conv52
518  %arrayidx55 = getelementptr inbounds i16, ptr %g, i64 %sub53
519  %20 = load i16, ptr %arrayidx55, align 2
520  %conv56 = zext i16 %20 to i32
521  %add57 = add nsw i32 %add48, %conv56
522  %incdec.ptr58 = getelementptr inbounds i16, ptr %a.addr.0101, i64 8
523  %21 = load i16, ptr %incdec.ptr49, align 2
524  %conv59 = zext i16 %21 to i64
525  %22 = load i16, ptr %incdec.ptr51, align 2
526  %conv61 = zext i16 %22 to i64
527  %sub62 = sub nsw i64 %conv59, %conv61
528  %arrayidx64 = getelementptr inbounds i16, ptr %g, i64 %sub62
529  %23 = load i16, ptr %arrayidx64, align 2
530  %conv65 = zext i16 %23 to i32
531  %add66 = add nsw i32 %add57, %conv65
532  %inc = add nuw nsw i32 %i.0103, 1
533  %exitcond = icmp eq i32 %inc, %n
534  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
535}
536