xref: /llvm-project/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll (revision e0ed0333f0fed2e73f805afd58b61176a87aa3ad)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
3; RUN: llc < %s -mtriple=thumbv8m.base-arm-none-eabi < %s | FileCheck %s
4
5define void @arm_q15_to_q31(ptr nocapture noundef readonly %pSrc, ptr nocapture noundef writeonly %pDst, i32 noundef %blockSize) {
6; CHECK-LABEL: arm_q15_to_q31:
7; CHECK:       @ %bb.0: @ %entry
8; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
9; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
10; CHECK-NEXT:    .pad #8
11; CHECK-NEXT:    sub sp, #8
12; CHECK-NEXT:    mov r7, r2
13; CHECK-NEXT:    lsrs r3, r2, #2
14; CHECK-NEXT:    beq .LBB0_6
15; CHECK-NEXT:  @ %bb.1: @ %while.body.preheader
16; CHECK-NEXT:    movs r5, #3
17; CHECK-NEXT:    ands r5, r3
18; CHECK-NEXT:    subs r2, r3, #1
19; CHECK-NEXT:    cbz r5, .LBB0_4
20; CHECK-NEXT:  @ %bb.2: @ %while.body.prol
21; CHECK-NEXT:    str r2, [sp] @ 4-byte Spill
22; CHECK-NEXT:    str r7, [sp, #4] @ 4-byte Spill
23; CHECK-NEXT:    ldrh r2, [r0]
24; CHECK-NEXT:    ldrh r7, [r0, #2]
25; CHECK-NEXT:    ldrh r4, [r0, #4]
26; CHECK-NEXT:    ldrh r6, [r0, #6]
27; CHECK-NEXT:    lsls r6, r6, #16
28; CHECK-NEXT:    lsls r4, r4, #16
29; CHECK-NEXT:    lsls r7, r7, #16
30; CHECK-NEXT:    lsls r2, r2, #16
31; CHECK-NEXT:    stm r1!, {r2, r7}
32; CHECK-NEXT:    str r4, [r1]
33; CHECK-NEXT:    str r6, [r1, #4]
34; CHECK-NEXT:    subs r1, #8
35; CHECK-NEXT:    cmp r5, #1
36; CHECK-NEXT:    bne .LBB0_11
37; CHECK-NEXT:  @ %bb.3:
38; CHECK-NEXT:    adds r1, #16
39; CHECK-NEXT:    adds r0, #8
40; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
41; CHECK-NEXT:    mov r3, r2
42; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
43; CHECK-NEXT:  .LBB0_4: @ %while.body.prol.loopexit
44; CHECK-NEXT:    cmp r2, #3
45; CHECK-NEXT:    blo .LBB0_6
46; CHECK-NEXT:  .LBB0_5: @ %while.body
47; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
48; CHECK-NEXT:    ldrh r2, [r0]
49; CHECK-NEXT:    ldrh r4, [r0, #2]
50; CHECK-NEXT:    ldrh r5, [r0, #4]
51; CHECK-NEXT:    ldrh r6, [r0, #6]
52; CHECK-NEXT:    lsls r6, r6, #16
53; CHECK-NEXT:    str r6, [r1, #12]
54; CHECK-NEXT:    lsls r5, r5, #16
55; CHECK-NEXT:    str r5, [r1, #8]
56; CHECK-NEXT:    lsls r4, r4, #16
57; CHECK-NEXT:    str r4, [r1, #4]
58; CHECK-NEXT:    lsls r2, r2, #16
59; CHECK-NEXT:    str r2, [r1]
60; CHECK-NEXT:    ldrh r2, [r0, #8]
61; CHECK-NEXT:    ldrh r4, [r0, #10]
62; CHECK-NEXT:    ldrh r5, [r0, #12]
63; CHECK-NEXT:    ldrh r6, [r0, #14]
64; CHECK-NEXT:    lsls r6, r6, #16
65; CHECK-NEXT:    str r6, [r1, #28]
66; CHECK-NEXT:    lsls r5, r5, #16
67; CHECK-NEXT:    str r5, [r1, #24]
68; CHECK-NEXT:    lsls r4, r4, #16
69; CHECK-NEXT:    str r4, [r1, #20]
70; CHECK-NEXT:    lsls r2, r2, #16
71; CHECK-NEXT:    str r2, [r1, #16]
72; CHECK-NEXT:    ldrh r2, [r0, #16]
73; CHECK-NEXT:    ldrh r4, [r0, #18]
74; CHECK-NEXT:    ldrh r5, [r0, #20]
75; CHECK-NEXT:    ldrh r6, [r0, #22]
76; CHECK-NEXT:    lsls r6, r6, #16
77; CHECK-NEXT:    str r6, [r1, #44]
78; CHECK-NEXT:    lsls r5, r5, #16
79; CHECK-NEXT:    str r5, [r1, #40]
80; CHECK-NEXT:    lsls r4, r4, #16
81; CHECK-NEXT:    str r4, [r1, #36]
82; CHECK-NEXT:    lsls r2, r2, #16
83; CHECK-NEXT:    str r2, [r1, #32]
84; CHECK-NEXT:    ldrh r2, [r0, #24]
85; CHECK-NEXT:    ldrh r4, [r0, #26]
86; CHECK-NEXT:    ldrh r5, [r0, #28]
87; CHECK-NEXT:    ldrh r6, [r0, #30]
88; CHECK-NEXT:    lsls r6, r6, #16
89; CHECK-NEXT:    str r6, [r1, #60]
90; CHECK-NEXT:    lsls r5, r5, #16
91; CHECK-NEXT:    str r5, [r1, #56]
92; CHECK-NEXT:    lsls r4, r4, #16
93; CHECK-NEXT:    str r4, [r1, #52]
94; CHECK-NEXT:    lsls r2, r2, #16
95; CHECK-NEXT:    str r2, [r1, #48]
96; CHECK-NEXT:    adds r1, #64
97; CHECK-NEXT:    adds r0, #32
98; CHECK-NEXT:    subs r3, r3, #4
99; CHECK-NEXT:    bne .LBB0_5
100; CHECK-NEXT:  .LBB0_6: @ %while.end
101; CHECK-NEXT:    movs r2, #3
102; CHECK-NEXT:    ands r7, r2
103; CHECK-NEXT:    beq .LBB0_10
104; CHECK-NEXT:  @ %bb.7: @ %while.body12
105; CHECK-NEXT:    ldrh r2, [r0]
106; CHECK-NEXT:    lsls r2, r2, #16
107; CHECK-NEXT:    str r2, [r1]
108; CHECK-NEXT:    cmp r7, #1
109; CHECK-NEXT:    beq .LBB0_10
110; CHECK-NEXT:  @ %bb.8: @ %while.body12.1
111; CHECK-NEXT:    ldrh r2, [r0, #2]
112; CHECK-NEXT:    lsls r2, r2, #16
113; CHECK-NEXT:    str r2, [r1, #4]
114; CHECK-NEXT:    cmp r7, #2
115; CHECK-NEXT:    beq .LBB0_10
116; CHECK-NEXT:  @ %bb.9: @ %while.body12.2
117; CHECK-NEXT:    ldrh r0, [r0, #4]
118; CHECK-NEXT:    lsls r0, r0, #16
119; CHECK-NEXT:    str r0, [r1, #8]
120; CHECK-NEXT:  .LBB0_10: @ %while.end17
121; CHECK-NEXT:    add sp, #8
122; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
123; CHECK-NEXT:  .LBB0_11: @ %while.body.prol.1
124; CHECK-NEXT:    ldrh r2, [r0, #8]
125; CHECK-NEXT:    ldrh r4, [r0, #10]
126; CHECK-NEXT:    ldrh r6, [r0, #12]
127; CHECK-NEXT:    ldrh r7, [r0, #14]
128; CHECK-NEXT:    lsls r7, r7, #16
129; CHECK-NEXT:    lsls r6, r6, #16
130; CHECK-NEXT:    lsls r4, r4, #16
131; CHECK-NEXT:    lsls r2, r2, #16
132; CHECK-NEXT:    str r2, [r1, #16]
133; CHECK-NEXT:    str r4, [r1, #20]
134; CHECK-NEXT:    str r6, [r1, #24]
135; CHECK-NEXT:    str r7, [r1, #28]
136; CHECK-NEXT:    cmp r5, #2
137; CHECK-NEXT:    bne .LBB0_13
138; CHECK-NEXT:  @ %bb.12:
139; CHECK-NEXT:    subs r3, r3, #2
140; CHECK-NEXT:    adds r1, #32
141; CHECK-NEXT:    adds r0, #16
142; CHECK-NEXT:    b .LBB0_14
143; CHECK-NEXT:  .LBB0_13: @ %while.body.prol.2
144; CHECK-NEXT:    ldrh r2, [r0, #16]
145; CHECK-NEXT:    ldrh r4, [r0, #18]
146; CHECK-NEXT:    ldrh r5, [r0, #20]
147; CHECK-NEXT:    ldrh r6, [r0, #22]
148; CHECK-NEXT:    lsls r6, r6, #16
149; CHECK-NEXT:    lsls r5, r5, #16
150; CHECK-NEXT:    lsls r4, r4, #16
151; CHECK-NEXT:    lsls r2, r2, #16
152; CHECK-NEXT:    mov r7, r1
153; CHECK-NEXT:    adds r7, #32
154; CHECK-NEXT:    stm r7!, {r2, r4, r5, r6}
155; CHECK-NEXT:    subs r3, r3, #3
156; CHECK-NEXT:    adds r1, #48
157; CHECK-NEXT:    adds r0, #24
158; CHECK-NEXT:  .LBB0_14: @ %while.body.prol.loopexit
159; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
160; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
161; CHECK-NEXT:    cmp r2, #3
162; CHECK-NEXT:    bhs .LBB0_5
163; CHECK-NEXT:    b .LBB0_6
164entry:
165  %cmp.not19 = icmp ult i32 %blockSize, 4
166  br i1 %cmp.not19, label %while.end, label %while.body.preheader
167
168while.body.preheader:                             ; preds = %entry
169  %shr = lshr i32 %blockSize, 2
170  %0 = add nsw i32 %shr, -1
171  %xtraiter = and i32 %shr, 3
172  %lcmp.mod.not = icmp eq i32 %xtraiter, 0
173  br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol
174
175while.body.prol:                                  ; preds = %while.body.preheader
176  %pIn.0.val.prol = load i16, ptr %pSrc, align 2
177  %1 = getelementptr i8, ptr %pSrc, i32 2
178  %pIn.0.val13.prol = load i16, ptr %1, align 2
179  %conv.i.prol = sext i16 %pIn.0.val13.prol to i32
180  %shl.i.prol = shl nsw i32 %conv.i.prol, 16
181  %conv22.i.prol = zext i16 %pIn.0.val.prol to i32
182  %add.ptr2.prol = getelementptr inbounds i16, ptr %pSrc, i32 4
183  %add.ptr3.prol = getelementptr inbounds i16, ptr %pSrc, i32 2
184  %add.ptr3.val.prol = load i16, ptr %add.ptr3.prol, align 2
185  %2 = getelementptr i16, ptr %pSrc, i32 3
186  %add.ptr3.val14.prol = load i16, ptr %2, align 2
187  %conv.i15.prol = sext i16 %add.ptr3.val14.prol to i32
188  %shl.i16.prol = shl nsw i32 %conv.i15.prol, 16
189  %conv22.i17.prol = zext i16 %add.ptr3.val.prol to i32
190  %shl.prol = shl nuw i32 %conv22.i.prol, 16
191  %shl5.prol = shl nuw i32 %conv22.i17.prol, 16
192  %incdec.ptr.prol = getelementptr inbounds i32, ptr %pDst, i32 1
193  store i32 %shl.prol, ptr %pDst, align 4
194  %incdec.ptr7.prol = getelementptr inbounds i32, ptr %pDst, i32 2
195  store i32 %shl.i.prol, ptr %incdec.ptr.prol, align 4
196  %incdec.ptr8.prol = getelementptr inbounds i32, ptr %pDst, i32 3
197  store i32 %shl5.prol, ptr %incdec.ptr7.prol, align 4
198  %incdec.ptr9.prol = getelementptr inbounds i32, ptr %pDst, i32 4
199  store i32 %shl.i16.prol, ptr %incdec.ptr8.prol, align 4
200  %dec.prol = add nsw i32 %shr, -1
201  %prol.iter.cmp.not = icmp eq i32 %xtraiter, 1
202  br i1 %prol.iter.cmp.not, label %while.body.prol.loopexit, label %while.body.prol.1
203
204while.body.prol.1:                                ; preds = %while.body.prol
205  %pIn.0.val.prol.1 = load i16, ptr %add.ptr2.prol, align 2
206  %3 = getelementptr i16, ptr %pSrc, i32 5
207  %pIn.0.val13.prol.1 = load i16, ptr %3, align 2
208  %conv.i.prol.1 = sext i16 %pIn.0.val13.prol.1 to i32
209  %shl.i.prol.1 = shl nsw i32 %conv.i.prol.1, 16
210  %conv22.i.prol.1 = zext i16 %pIn.0.val.prol.1 to i32
211  %add.ptr2.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 8
212  %add.ptr3.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 6
213  %add.ptr3.val.prol.1 = load i16, ptr %add.ptr3.prol.1, align 2
214  %4 = getelementptr i16, ptr %pSrc, i32 7
215  %add.ptr3.val14.prol.1 = load i16, ptr %4, align 2
216  %conv.i15.prol.1 = sext i16 %add.ptr3.val14.prol.1 to i32
217  %shl.i16.prol.1 = shl nsw i32 %conv.i15.prol.1, 16
218  %conv22.i17.prol.1 = zext i16 %add.ptr3.val.prol.1 to i32
219  %shl.prol.1 = shl nuw i32 %conv22.i.prol.1, 16
220  %shl5.prol.1 = shl nuw i32 %conv22.i17.prol.1, 16
221  %incdec.ptr.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 5
222  store i32 %shl.prol.1, ptr %incdec.ptr9.prol, align 4
223  %incdec.ptr7.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 6
224  store i32 %shl.i.prol.1, ptr %incdec.ptr.prol.1, align 4
225  %incdec.ptr8.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 7
226  store i32 %shl5.prol.1, ptr %incdec.ptr7.prol.1, align 4
227  %incdec.ptr9.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 8
228  store i32 %shl.i16.prol.1, ptr %incdec.ptr8.prol.1, align 4
229  %dec.prol.1 = add nsw i32 %shr, -2
230  %prol.iter.cmp.1.not = icmp eq i32 %xtraiter, 2
231  br i1 %prol.iter.cmp.1.not, label %while.body.prol.loopexit, label %while.body.prol.2
232
233while.body.prol.2:                                ; preds = %while.body.prol.1
234  %pIn.0.val.prol.2 = load i16, ptr %add.ptr2.prol.1, align 2
235  %5 = getelementptr i16, ptr %pSrc, i32 9
236  %pIn.0.val13.prol.2 = load i16, ptr %5, align 2
237  %conv.i.prol.2 = sext i16 %pIn.0.val13.prol.2 to i32
238  %shl.i.prol.2 = shl nsw i32 %conv.i.prol.2, 16
239  %conv22.i.prol.2 = zext i16 %pIn.0.val.prol.2 to i32
240  %add.ptr2.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 12
241  %add.ptr3.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 10
242  %add.ptr3.val.prol.2 = load i16, ptr %add.ptr3.prol.2, align 2
243  %6 = getelementptr i16, ptr %pSrc, i32 11
244  %add.ptr3.val14.prol.2 = load i16, ptr %6, align 2
245  %conv.i15.prol.2 = sext i16 %add.ptr3.val14.prol.2 to i32
246  %shl.i16.prol.2 = shl nsw i32 %conv.i15.prol.2, 16
247  %conv22.i17.prol.2 = zext i16 %add.ptr3.val.prol.2 to i32
248  %shl.prol.2 = shl nuw i32 %conv22.i.prol.2, 16
249  %shl5.prol.2 = shl nuw i32 %conv22.i17.prol.2, 16
250  %incdec.ptr.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 9
251  store i32 %shl.prol.2, ptr %incdec.ptr9.prol.1, align 4
252  %incdec.ptr7.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 10
253  store i32 %shl.i.prol.2, ptr %incdec.ptr.prol.2, align 4
254  %incdec.ptr8.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 11
255  store i32 %shl5.prol.2, ptr %incdec.ptr7.prol.2, align 4
256  %incdec.ptr9.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 12
257  store i32 %shl.i16.prol.2, ptr %incdec.ptr8.prol.2, align 4
258  %dec.prol.2 = add nsw i32 %shr, -3
259  br label %while.body.prol.loopexit
260
261while.body.prol.loopexit:                         ; preds = %while.body.prol, %while.body.prol.1, %while.body.prol.2, %while.body.preheader
262  %add.ptr2.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ]
263  %incdec.ptr9.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ]
264  %pDst.addr.022.unr = phi ptr [ %pDst, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ]
265  %blkCnt.021.unr = phi i32 [ %shr, %while.body.preheader ], [ %dec.prol, %while.body.prol ], [ %dec.prol.1, %while.body.prol.1 ], [ %dec.prol.2, %while.body.prol.2 ]
266  %pIn.020.unr = phi ptr [ %pSrc, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ]
267  %7 = icmp ult i32 %0, 3
268  br i1 %7, label %while.end, label %while.body
269
270while.body:                                       ; preds = %while.body.prol.loopexit, %while.body
271  %pDst.addr.022 = phi ptr [ %incdec.ptr9.3, %while.body ], [ %pDst.addr.022.unr, %while.body.prol.loopexit ]
272  %blkCnt.021 = phi i32 [ %dec.3, %while.body ], [ %blkCnt.021.unr, %while.body.prol.loopexit ]
273  %pIn.020 = phi ptr [ %add.ptr2.3, %while.body ], [ %pIn.020.unr, %while.body.prol.loopexit ]
274  %pIn.0.val = load i16, ptr %pIn.020, align 2
275  %8 = getelementptr i8, ptr %pIn.020, i32 2
276  %pIn.0.val13 = load i16, ptr %8, align 2
277  %conv.i = sext i16 %pIn.0.val13 to i32
278  %shl.i = shl nsw i32 %conv.i, 16
279  %conv22.i = zext i16 %pIn.0.val to i32
280  %add.ptr2 = getelementptr inbounds i16, ptr %pIn.020, i32 4
281  %add.ptr3 = getelementptr inbounds i16, ptr %pIn.020, i32 2
282  %add.ptr3.val = load i16, ptr %add.ptr3, align 2
283  %9 = getelementptr i16, ptr %pIn.020, i32 3
284  %add.ptr3.val14 = load i16, ptr %9, align 2
285  %conv.i15 = sext i16 %add.ptr3.val14 to i32
286  %shl.i16 = shl nsw i32 %conv.i15, 16
287  %conv22.i17 = zext i16 %add.ptr3.val to i32
288  %shl = shl nuw i32 %conv22.i, 16
289  %shl5 = shl nuw i32 %conv22.i17, 16
290  %incdec.ptr = getelementptr inbounds i32, ptr %pDst.addr.022, i32 1
291  store i32 %shl, ptr %pDst.addr.022, align 4
292  %incdec.ptr7 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 2
293  store i32 %shl.i, ptr %incdec.ptr, align 4
294  %incdec.ptr8 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 3
295  store i32 %shl5, ptr %incdec.ptr7, align 4
296  %incdec.ptr9 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 4
297  store i32 %shl.i16, ptr %incdec.ptr8, align 4
298  %pIn.0.val.1 = load i16, ptr %add.ptr2, align 2
299  %10 = getelementptr i16, ptr %pIn.020, i32 5
300  %pIn.0.val13.1 = load i16, ptr %10, align 2
301  %conv.i.1 = sext i16 %pIn.0.val13.1 to i32
302  %shl.i.1 = shl nsw i32 %conv.i.1, 16
303  %conv22.i.1 = zext i16 %pIn.0.val.1 to i32
304  %add.ptr2.1 = getelementptr inbounds i16, ptr %pIn.020, i32 8
305  %add.ptr3.1 = getelementptr inbounds i16, ptr %pIn.020, i32 6
306  %add.ptr3.val.1 = load i16, ptr %add.ptr3.1, align 2
307  %11 = getelementptr i16, ptr %pIn.020, i32 7
308  %add.ptr3.val14.1 = load i16, ptr %11, align 2
309  %conv.i15.1 = sext i16 %add.ptr3.val14.1 to i32
310  %shl.i16.1 = shl nsw i32 %conv.i15.1, 16
311  %conv22.i17.1 = zext i16 %add.ptr3.val.1 to i32
312  %shl.1 = shl nuw i32 %conv22.i.1, 16
313  %shl5.1 = shl nuw i32 %conv22.i17.1, 16
314  %incdec.ptr.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 5
315  store i32 %shl.1, ptr %incdec.ptr9, align 4
316  %incdec.ptr7.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 6
317  store i32 %shl.i.1, ptr %incdec.ptr.1, align 4
318  %incdec.ptr8.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 7
319  store i32 %shl5.1, ptr %incdec.ptr7.1, align 4
320  %incdec.ptr9.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 8
321  store i32 %shl.i16.1, ptr %incdec.ptr8.1, align 4
322  %pIn.0.val.2 = load i16, ptr %add.ptr2.1, align 2
323  %12 = getelementptr i16, ptr %pIn.020, i32 9
324  %pIn.0.val13.2 = load i16, ptr %12, align 2
325  %conv.i.2 = sext i16 %pIn.0.val13.2 to i32
326  %shl.i.2 = shl nsw i32 %conv.i.2, 16
327  %conv22.i.2 = zext i16 %pIn.0.val.2 to i32
328  %add.ptr2.2 = getelementptr inbounds i16, ptr %pIn.020, i32 12
329  %add.ptr3.2 = getelementptr inbounds i16, ptr %pIn.020, i32 10
330  %add.ptr3.val.2 = load i16, ptr %add.ptr3.2, align 2
331  %13 = getelementptr i16, ptr %pIn.020, i32 11
332  %add.ptr3.val14.2 = load i16, ptr %13, align 2
333  %conv.i15.2 = sext i16 %add.ptr3.val14.2 to i32
334  %shl.i16.2 = shl nsw i32 %conv.i15.2, 16
335  %conv22.i17.2 = zext i16 %add.ptr3.val.2 to i32
336  %shl.2 = shl nuw i32 %conv22.i.2, 16
337  %shl5.2 = shl nuw i32 %conv22.i17.2, 16
338  %incdec.ptr.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 9
339  store i32 %shl.2, ptr %incdec.ptr9.1, align 4
340  %incdec.ptr7.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 10
341  store i32 %shl.i.2, ptr %incdec.ptr.2, align 4
342  %incdec.ptr8.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 11
343  store i32 %shl5.2, ptr %incdec.ptr7.2, align 4
344  %incdec.ptr9.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 12
345  store i32 %shl.i16.2, ptr %incdec.ptr8.2, align 4
346  %pIn.0.val.3 = load i16, ptr %add.ptr2.2, align 2
347  %14 = getelementptr i16, ptr %pIn.020, i32 13
348  %pIn.0.val13.3 = load i16, ptr %14, align 2
349  %conv.i.3 = sext i16 %pIn.0.val13.3 to i32
350  %shl.i.3 = shl nsw i32 %conv.i.3, 16
351  %conv22.i.3 = zext i16 %pIn.0.val.3 to i32
352  %add.ptr2.3 = getelementptr inbounds i16, ptr %pIn.020, i32 16
353  %add.ptr3.3 = getelementptr inbounds i16, ptr %pIn.020, i32 14
354  %add.ptr3.val.3 = load i16, ptr %add.ptr3.3, align 2
355  %15 = getelementptr i16, ptr %pIn.020, i32 15
356  %add.ptr3.val14.3 = load i16, ptr %15, align 2
357  %conv.i15.3 = sext i16 %add.ptr3.val14.3 to i32
358  %shl.i16.3 = shl nsw i32 %conv.i15.3, 16
359  %conv22.i17.3 = zext i16 %add.ptr3.val.3 to i32
360  %shl.3 = shl nuw i32 %conv22.i.3, 16
361  %shl5.3 = shl nuw i32 %conv22.i17.3, 16
362  %incdec.ptr.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 13
363  store i32 %shl.3, ptr %incdec.ptr9.2, align 4
364  %incdec.ptr7.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 14
365  store i32 %shl.i.3, ptr %incdec.ptr.3, align 4
366  %incdec.ptr8.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 15
367  store i32 %shl5.3, ptr %incdec.ptr7.3, align 4
368  %incdec.ptr9.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 16
369  store i32 %shl.i16.3, ptr %incdec.ptr8.3, align 4
370  %dec.3 = add nsw i32 %blkCnt.021, -4
371  %cmp.not.3 = icmp eq i32 %dec.3, 0
372  br i1 %cmp.not.3, label %while.end, label %while.body
373
374while.end:                                        ; preds = %while.body.prol.loopexit, %while.body, %entry
375  %pIn.0.lcssa = phi ptr [ %pSrc, %entry ], [ %add.ptr2.lcssa.unr, %while.body.prol.loopexit ], [ %add.ptr2.3, %while.body ]
376  %pDst.addr.0.lcssa = phi ptr [ %pDst, %entry ], [ %incdec.ptr9.lcssa.unr, %while.body.prol.loopexit ], [ %incdec.ptr9.3, %while.body ]
377  %rem = and i32 %blockSize, 3
378  %cmp11.not24 = icmp eq i32 %rem, 0
379  br i1 %cmp11.not24, label %while.end17, label %while.body12
380
381while.body12:                                     ; preds = %while.end
382  %16 = load i16, ptr %pIn.0.lcssa, align 2
383  %conv = sext i16 %16 to i32
384  %shl14 = shl nsw i32 %conv, 16
385  store i32 %shl14, ptr %pDst.addr.0.lcssa, align 4
386  %cmp11.not = icmp eq i32 %rem, 1
387  br i1 %cmp11.not, label %while.end17, label %while.body12.1
388
389while.body12.1:                                   ; preds = %while.body12
390  %incdec.ptr15 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 1
391  %incdec.ptr13 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 1
392  %17 = load i16, ptr %incdec.ptr13, align 2
393  %conv.1 = sext i16 %17 to i32
394  %shl14.1 = shl nsw i32 %conv.1, 16
395  store i32 %shl14.1, ptr %incdec.ptr15, align 4
396  %cmp11.not.1 = icmp eq i32 %rem, 2
397  br i1 %cmp11.not.1, label %while.end17, label %while.body12.2
398
399while.body12.2:                                   ; preds = %while.body12.1
400  %incdec.ptr15.1 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 2
401  %incdec.ptr13.1 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 2
402  %18 = load i16, ptr %incdec.ptr13.1, align 2
403  %conv.2 = sext i16 %18 to i32
404  %shl14.2 = shl nsw i32 %conv.2, 16
405  store i32 %shl14.2, ptr %incdec.ptr15.1, align 4
406  br label %while.end17
407
408while.end17:                                      ; preds = %while.body12, %while.body12.1, %while.body12.2, %while.end
409  ret void
410}
411
412define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr nocapture noundef writeonly %pDst, i32 noundef %blockSize) {
413; CHECK-LABEL: arm_q15_to_q31_altorder:
414; CHECK:       @ %bb.0: @ %entry
415; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
416; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
417; CHECK-NEXT:    .pad #8
418; CHECK-NEXT:    sub sp, #8
419; CHECK-NEXT:    mov r7, r2
420; CHECK-NEXT:    lsrs r3, r2, #2
421; CHECK-NEXT:    beq .LBB1_6
422; CHECK-NEXT:  @ %bb.1: @ %while.body.preheader
423; CHECK-NEXT:    movs r5, #3
424; CHECK-NEXT:    ands r5, r3
425; CHECK-NEXT:    subs r2, r3, #1
426; CHECK-NEXT:    cbz r5, .LBB1_4
427; CHECK-NEXT:  @ %bb.2: @ %while.body.prol
428; CHECK-NEXT:    str r2, [sp] @ 4-byte Spill
429; CHECK-NEXT:    str r7, [sp, #4] @ 4-byte Spill
430; CHECK-NEXT:    ldrh r2, [r0]
431; CHECK-NEXT:    ldrh r7, [r0, #2]
432; CHECK-NEXT:    ldrh r4, [r0, #4]
433; CHECK-NEXT:    ldrh r6, [r0, #6]
434; CHECK-NEXT:    lsls r6, r6, #16
435; CHECK-NEXT:    lsls r4, r4, #16
436; CHECK-NEXT:    lsls r7, r7, #16
437; CHECK-NEXT:    lsls r2, r2, #16
438; CHECK-NEXT:    stm r1!, {r2, r7}
439; CHECK-NEXT:    str r4, [r1]
440; CHECK-NEXT:    str r6, [r1, #4]
441; CHECK-NEXT:    subs r1, #8
442; CHECK-NEXT:    cmp r5, #1
443; CHECK-NEXT:    bne .LBB1_11
444; CHECK-NEXT:  @ %bb.3:
445; CHECK-NEXT:    adds r1, #16
446; CHECK-NEXT:    adds r0, #8
447; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
448; CHECK-NEXT:    mov r3, r2
449; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
450; CHECK-NEXT:  .LBB1_4: @ %while.body.prol.loopexit
451; CHECK-NEXT:    cmp r2, #3
452; CHECK-NEXT:    blo .LBB1_6
453; CHECK-NEXT:  .LBB1_5: @ %while.body
454; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
455; CHECK-NEXT:    ldrh r2, [r0]
456; CHECK-NEXT:    ldrh r4, [r0, #2]
457; CHECK-NEXT:    ldrh r5, [r0, #4]
458; CHECK-NEXT:    ldrh r6, [r0, #6]
459; CHECK-NEXT:    lsls r6, r6, #16
460; CHECK-NEXT:    str r6, [r1, #12]
461; CHECK-NEXT:    lsls r5, r5, #16
462; CHECK-NEXT:    str r5, [r1, #8]
463; CHECK-NEXT:    lsls r4, r4, #16
464; CHECK-NEXT:    str r4, [r1, #4]
465; CHECK-NEXT:    lsls r2, r2, #16
466; CHECK-NEXT:    str r2, [r1]
467; CHECK-NEXT:    ldrh r2, [r0, #8]
468; CHECK-NEXT:    ldrh r4, [r0, #10]
469; CHECK-NEXT:    ldrh r5, [r0, #12]
470; CHECK-NEXT:    ldrh r6, [r0, #14]
471; CHECK-NEXT:    lsls r6, r6, #16
472; CHECK-NEXT:    str r6, [r1, #28]
473; CHECK-NEXT:    lsls r5, r5, #16
474; CHECK-NEXT:    str r5, [r1, #24]
475; CHECK-NEXT:    lsls r4, r4, #16
476; CHECK-NEXT:    str r4, [r1, #20]
477; CHECK-NEXT:    lsls r2, r2, #16
478; CHECK-NEXT:    str r2, [r1, #16]
479; CHECK-NEXT:    ldrh r2, [r0, #16]
480; CHECK-NEXT:    ldrh r4, [r0, #18]
481; CHECK-NEXT:    ldrh r5, [r0, #20]
482; CHECK-NEXT:    ldrh r6, [r0, #22]
483; CHECK-NEXT:    lsls r6, r6, #16
484; CHECK-NEXT:    str r6, [r1, #44]
485; CHECK-NEXT:    lsls r5, r5, #16
486; CHECK-NEXT:    str r5, [r1, #40]
487; CHECK-NEXT:    lsls r4, r4, #16
488; CHECK-NEXT:    str r4, [r1, #36]
489; CHECK-NEXT:    lsls r2, r2, #16
490; CHECK-NEXT:    str r2, [r1, #32]
491; CHECK-NEXT:    ldrh r2, [r0, #24]
492; CHECK-NEXT:    ldrh r4, [r0, #26]
493; CHECK-NEXT:    ldrh r5, [r0, #28]
494; CHECK-NEXT:    ldrh r6, [r0, #30]
495; CHECK-NEXT:    lsls r6, r6, #16
496; CHECK-NEXT:    str r6, [r1, #60]
497; CHECK-NEXT:    lsls r5, r5, #16
498; CHECK-NEXT:    str r5, [r1, #56]
499; CHECK-NEXT:    lsls r4, r4, #16
500; CHECK-NEXT:    str r4, [r1, #52]
501; CHECK-NEXT:    lsls r2, r2, #16
502; CHECK-NEXT:    str r2, [r1, #48]
503; CHECK-NEXT:    adds r1, #64
504; CHECK-NEXT:    adds r0, #32
505; CHECK-NEXT:    subs r3, r3, #4
506; CHECK-NEXT:    bne .LBB1_5
507; CHECK-NEXT:  .LBB1_6: @ %while.end
508; CHECK-NEXT:    movs r2, #3
509; CHECK-NEXT:    ands r7, r2
510; CHECK-NEXT:    beq .LBB1_10
511; CHECK-NEXT:  @ %bb.7: @ %while.body12
512; CHECK-NEXT:    ldrh r2, [r0]
513; CHECK-NEXT:    lsls r2, r2, #16
514; CHECK-NEXT:    str r2, [r1]
515; CHECK-NEXT:    cmp r7, #1
516; CHECK-NEXT:    beq .LBB1_10
517; CHECK-NEXT:  @ %bb.8: @ %while.body12.1
518; CHECK-NEXT:    ldrh r2, [r0, #2]
519; CHECK-NEXT:    lsls r2, r2, #16
520; CHECK-NEXT:    str r2, [r1, #4]
521; CHECK-NEXT:    cmp r7, #2
522; CHECK-NEXT:    beq .LBB1_10
523; CHECK-NEXT:  @ %bb.9: @ %while.body12.2
524; CHECK-NEXT:    ldrh r0, [r0, #4]
525; CHECK-NEXT:    lsls r0, r0, #16
526; CHECK-NEXT:    str r0, [r1, #8]
527; CHECK-NEXT:  .LBB1_10: @ %while.end17
528; CHECK-NEXT:    add sp, #8
529; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
530; CHECK-NEXT:  .LBB1_11: @ %while.body.prol.1
531; CHECK-NEXT:    ldrh r2, [r0, #8]
532; CHECK-NEXT:    ldrh r4, [r0, #10]
533; CHECK-NEXT:    ldrh r6, [r0, #12]
534; CHECK-NEXT:    ldrh r7, [r0, #14]
535; CHECK-NEXT:    lsls r7, r7, #16
536; CHECK-NEXT:    lsls r6, r6, #16
537; CHECK-NEXT:    lsls r4, r4, #16
538; CHECK-NEXT:    lsls r2, r2, #16
539; CHECK-NEXT:    str r2, [r1, #16]
540; CHECK-NEXT:    str r4, [r1, #20]
541; CHECK-NEXT:    str r6, [r1, #24]
542; CHECK-NEXT:    str r7, [r1, #28]
543; CHECK-NEXT:    cmp r5, #2
544; CHECK-NEXT:    bne .LBB1_13
545; CHECK-NEXT:  @ %bb.12:
546; CHECK-NEXT:    subs r3, r3, #2
547; CHECK-NEXT:    adds r1, #32
548; CHECK-NEXT:    adds r0, #16
549; CHECK-NEXT:    b .LBB1_14
550; CHECK-NEXT:  .LBB1_13: @ %while.body.prol.2
551; CHECK-NEXT:    ldrh r2, [r0, #16]
552; CHECK-NEXT:    ldrh r4, [r0, #18]
553; CHECK-NEXT:    ldrh r5, [r0, #20]
554; CHECK-NEXT:    ldrh r6, [r0, #22]
555; CHECK-NEXT:    lsls r6, r6, #16
556; CHECK-NEXT:    lsls r5, r5, #16
557; CHECK-NEXT:    lsls r4, r4, #16
558; CHECK-NEXT:    lsls r2, r2, #16
559; CHECK-NEXT:    mov r7, r1
560; CHECK-NEXT:    adds r7, #32
561; CHECK-NEXT:    stm r7!, {r2, r4, r5, r6}
562; CHECK-NEXT:    subs r3, r3, #3
563; CHECK-NEXT:    adds r1, #48
564; CHECK-NEXT:    adds r0, #24
565; CHECK-NEXT:  .LBB1_14: @ %while.body.prol.loopexit
566; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
567; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
568; CHECK-NEXT:    cmp r2, #3
569; CHECK-NEXT:    bhs .LBB1_5
570; CHECK-NEXT:    b .LBB1_6
571entry:
572  %cmp.not18 = icmp ult i32 %blockSize, 4
573  br i1 %cmp.not18, label %while.end, label %while.body.preheader
574
575while.body.preheader:                             ; preds = %entry
576  %shr = lshr i32 %blockSize, 2
577  %0 = add nsw i32 %shr, -1
578  %xtraiter = and i32 %shr, 3
579  %lcmp.mod.not = icmp eq i32 %xtraiter, 0
580  br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol
581
582while.body.prol:                                  ; preds = %while.body.preheader
583  %arrayidx.i.prol = getelementptr inbounds i16, ptr %pSrc, i32 1
584  %1 = load i16, ptr %arrayidx.i.prol, align 2
585  %conv.i.prol = sext i16 %1 to i32
586  %shl.i.prol = shl nsw i32 %conv.i.prol, 16
587  %2 = load i16, ptr %pSrc, align 2
588  %conv22.i.prol = zext i16 %2 to i32
589  %add.ptr2.prol = getelementptr inbounds i16, ptr %pSrc, i32 4
590  %add.ptr3.prol = getelementptr inbounds i16, ptr %pSrc, i32 2
591  %arrayidx.i13.prol = getelementptr inbounds i16, ptr %pSrc, i32 3
592  %3 = load i16, ptr %arrayidx.i13.prol, align 2
593  %conv.i14.prol = sext i16 %3 to i32
594  %shl.i15.prol = shl nsw i32 %conv.i14.prol, 16
595  %4 = load i16, ptr %add.ptr3.prol, align 2
596  %conv22.i16.prol = zext i16 %4 to i32
597  %shl.prol = shl nuw i32 %conv22.i.prol, 16
598  %shl5.prol = shl nuw i32 %conv22.i16.prol, 16
599  %incdec.ptr.prol = getelementptr inbounds i32, ptr %pDst, i32 1
600  store i32 %shl.prol, ptr %pDst, align 4
601  %incdec.ptr7.prol = getelementptr inbounds i32, ptr %pDst, i32 2
602  store i32 %shl.i.prol, ptr %incdec.ptr.prol, align 4
603  %incdec.ptr8.prol = getelementptr inbounds i32, ptr %pDst, i32 3
604  store i32 %shl5.prol, ptr %incdec.ptr7.prol, align 4
605  %incdec.ptr9.prol = getelementptr inbounds i32, ptr %pDst, i32 4
606  store i32 %shl.i15.prol, ptr %incdec.ptr8.prol, align 4
607  %dec.prol = add nsw i32 %shr, -1
608  %prol.iter.cmp.not = icmp eq i32 %xtraiter, 1
609  br i1 %prol.iter.cmp.not, label %while.body.prol.loopexit, label %while.body.prol.1
610
611while.body.prol.1:                                ; preds = %while.body.prol
612  %arrayidx.i.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 5
613  %5 = load i16, ptr %arrayidx.i.prol.1, align 2
614  %conv.i.prol.1 = sext i16 %5 to i32
615  %shl.i.prol.1 = shl nsw i32 %conv.i.prol.1, 16
616  %6 = load i16, ptr %add.ptr2.prol, align 2
617  %conv22.i.prol.1 = zext i16 %6 to i32
618  %add.ptr2.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 8
619  %add.ptr3.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 6
620  %arrayidx.i13.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 7
621  %7 = load i16, ptr %arrayidx.i13.prol.1, align 2
622  %conv.i14.prol.1 = sext i16 %7 to i32
623  %shl.i15.prol.1 = shl nsw i32 %conv.i14.prol.1, 16
624  %8 = load i16, ptr %add.ptr3.prol.1, align 2
625  %conv22.i16.prol.1 = zext i16 %8 to i32
626  %shl.prol.1 = shl nuw i32 %conv22.i.prol.1, 16
627  %shl5.prol.1 = shl nuw i32 %conv22.i16.prol.1, 16
628  %incdec.ptr.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 5
629  store i32 %shl.prol.1, ptr %incdec.ptr9.prol, align 4
630  %incdec.ptr7.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 6
631  store i32 %shl.i.prol.1, ptr %incdec.ptr.prol.1, align 4
632  %incdec.ptr8.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 7
633  store i32 %shl5.prol.1, ptr %incdec.ptr7.prol.1, align 4
634  %incdec.ptr9.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 8
635  store i32 %shl.i15.prol.1, ptr %incdec.ptr8.prol.1, align 4
636  %dec.prol.1 = add nsw i32 %shr, -2
637  %prol.iter.cmp.1.not = icmp eq i32 %xtraiter, 2
638  br i1 %prol.iter.cmp.1.not, label %while.body.prol.loopexit, label %while.body.prol.2
639
640while.body.prol.2:                                ; preds = %while.body.prol.1
641  %arrayidx.i.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 9
642  %9 = load i16, ptr %arrayidx.i.prol.2, align 2
643  %conv.i.prol.2 = sext i16 %9 to i32
644  %shl.i.prol.2 = shl nsw i32 %conv.i.prol.2, 16
645  %10 = load i16, ptr %add.ptr2.prol.1, align 2
646  %conv22.i.prol.2 = zext i16 %10 to i32
647  %add.ptr2.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 12
648  %add.ptr3.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 10
649  %arrayidx.i13.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 11
650  %11 = load i16, ptr %arrayidx.i13.prol.2, align 2
651  %conv.i14.prol.2 = sext i16 %11 to i32
652  %shl.i15.prol.2 = shl nsw i32 %conv.i14.prol.2, 16
653  %12 = load i16, ptr %add.ptr3.prol.2, align 2
654  %conv22.i16.prol.2 = zext i16 %12 to i32
655  %shl.prol.2 = shl nuw i32 %conv22.i.prol.2, 16
656  %shl5.prol.2 = shl nuw i32 %conv22.i16.prol.2, 16
657  %incdec.ptr.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 9
658  store i32 %shl.prol.2, ptr %incdec.ptr9.prol.1, align 4
659  %incdec.ptr7.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 10
660  store i32 %shl.i.prol.2, ptr %incdec.ptr.prol.2, align 4
661  %incdec.ptr8.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 11
662  store i32 %shl5.prol.2, ptr %incdec.ptr7.prol.2, align 4
663  %incdec.ptr9.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 12
664  store i32 %shl.i15.prol.2, ptr %incdec.ptr8.prol.2, align 4
665  %dec.prol.2 = add nsw i32 %shr, -3
666  br label %while.body.prol.loopexit
667
668while.body.prol.loopexit:                         ; preds = %while.body.prol, %while.body.prol.1, %while.body.prol.2, %while.body.preheader
669  %add.ptr2.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ]
670  %incdec.ptr9.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ]
671  %pDst.addr.021.unr = phi ptr [ %pDst, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ]
672  %blkCnt.020.unr = phi i32 [ %shr, %while.body.preheader ], [ %dec.prol, %while.body.prol ], [ %dec.prol.1, %while.body.prol.1 ], [ %dec.prol.2, %while.body.prol.2 ]
673  %pIn.019.unr = phi ptr [ %pSrc, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ]
674  %13 = icmp ult i32 %0, 3
675  br i1 %13, label %while.end, label %while.body
676
677while.body:                                       ; preds = %while.body.prol.loopexit, %while.body
678  %pDst.addr.021 = phi ptr [ %incdec.ptr9.3, %while.body ], [ %pDst.addr.021.unr, %while.body.prol.loopexit ]
679  %blkCnt.020 = phi i32 [ %dec.3, %while.body ], [ %blkCnt.020.unr, %while.body.prol.loopexit ]
680  %pIn.019 = phi ptr [ %add.ptr2.3, %while.body ], [ %pIn.019.unr, %while.body.prol.loopexit ]
681  %arrayidx.i = getelementptr inbounds i16, ptr %pIn.019, i32 1
682  %14 = load i16, ptr %arrayidx.i, align 2
683  %conv.i = sext i16 %14 to i32
684  %shl.i = shl nsw i32 %conv.i, 16
685  %15 = load i16, ptr %pIn.019, align 2
686  %conv22.i = zext i16 %15 to i32
687  %add.ptr2 = getelementptr inbounds i16, ptr %pIn.019, i32 4
688  %add.ptr3 = getelementptr inbounds i16, ptr %pIn.019, i32 2
689  %arrayidx.i13 = getelementptr inbounds i16, ptr %pIn.019, i32 3
690  %16 = load i16, ptr %arrayidx.i13, align 2
691  %conv.i14 = sext i16 %16 to i32
692  %shl.i15 = shl nsw i32 %conv.i14, 16
693  %17 = load i16, ptr %add.ptr3, align 2
694  %conv22.i16 = zext i16 %17 to i32
695  %shl = shl nuw i32 %conv22.i, 16
696  %shl5 = shl nuw i32 %conv22.i16, 16
697  %incdec.ptr = getelementptr inbounds i32, ptr %pDst.addr.021, i32 1
698  store i32 %shl, ptr %pDst.addr.021, align 4
699  %incdec.ptr7 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 2
700  store i32 %shl.i, ptr %incdec.ptr, align 4
701  %incdec.ptr8 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 3
702  store i32 %shl5, ptr %incdec.ptr7, align 4
703  %incdec.ptr9 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 4
704  store i32 %shl.i15, ptr %incdec.ptr8, align 4
705  %arrayidx.i.1 = getelementptr inbounds i16, ptr %pIn.019, i32 5
706  %18 = load i16, ptr %arrayidx.i.1, align 2
707  %conv.i.1 = sext i16 %18 to i32
708  %shl.i.1 = shl nsw i32 %conv.i.1, 16
709  %19 = load i16, ptr %add.ptr2, align 2
710  %conv22.i.1 = zext i16 %19 to i32
711  %add.ptr2.1 = getelementptr inbounds i16, ptr %pIn.019, i32 8
712  %add.ptr3.1 = getelementptr inbounds i16, ptr %pIn.019, i32 6
713  %arrayidx.i13.1 = getelementptr inbounds i16, ptr %pIn.019, i32 7
714  %20 = load i16, ptr %arrayidx.i13.1, align 2
715  %conv.i14.1 = sext i16 %20 to i32
716  %shl.i15.1 = shl nsw i32 %conv.i14.1, 16
717  %21 = load i16, ptr %add.ptr3.1, align 2
718  %conv22.i16.1 = zext i16 %21 to i32
719  %shl.1 = shl nuw i32 %conv22.i.1, 16
720  %shl5.1 = shl nuw i32 %conv22.i16.1, 16
721  %incdec.ptr.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 5
722  store i32 %shl.1, ptr %incdec.ptr9, align 4
723  %incdec.ptr7.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 6
724  store i32 %shl.i.1, ptr %incdec.ptr.1, align 4
725  %incdec.ptr8.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 7
726  store i32 %shl5.1, ptr %incdec.ptr7.1, align 4
727  %incdec.ptr9.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 8
728  store i32 %shl.i15.1, ptr %incdec.ptr8.1, align 4
729  %arrayidx.i.2 = getelementptr inbounds i16, ptr %pIn.019, i32 9
730  %22 = load i16, ptr %arrayidx.i.2, align 2
731  %conv.i.2 = sext i16 %22 to i32
732  %shl.i.2 = shl nsw i32 %conv.i.2, 16
733  %23 = load i16, ptr %add.ptr2.1, align 2
734  %conv22.i.2 = zext i16 %23 to i32
735  %add.ptr2.2 = getelementptr inbounds i16, ptr %pIn.019, i32 12
736  %add.ptr3.2 = getelementptr inbounds i16, ptr %pIn.019, i32 10
737  %arrayidx.i13.2 = getelementptr inbounds i16, ptr %pIn.019, i32 11
738  %24 = load i16, ptr %arrayidx.i13.2, align 2
739  %conv.i14.2 = sext i16 %24 to i32
740  %shl.i15.2 = shl nsw i32 %conv.i14.2, 16
741  %25 = load i16, ptr %add.ptr3.2, align 2
742  %conv22.i16.2 = zext i16 %25 to i32
743  %shl.2 = shl nuw i32 %conv22.i.2, 16
744  %shl5.2 = shl nuw i32 %conv22.i16.2, 16
745  %incdec.ptr.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 9
746  store i32 %shl.2, ptr %incdec.ptr9.1, align 4
747  %incdec.ptr7.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 10
748  store i32 %shl.i.2, ptr %incdec.ptr.2, align 4
749  %incdec.ptr8.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 11
750  store i32 %shl5.2, ptr %incdec.ptr7.2, align 4
751  %incdec.ptr9.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 12
752  store i32 %shl.i15.2, ptr %incdec.ptr8.2, align 4
753  %arrayidx.i.3 = getelementptr inbounds i16, ptr %pIn.019, i32 13
754  %26 = load i16, ptr %arrayidx.i.3, align 2
755  %conv.i.3 = sext i16 %26 to i32
756  %shl.i.3 = shl nsw i32 %conv.i.3, 16
757  %27 = load i16, ptr %add.ptr2.2, align 2
758  %conv22.i.3 = zext i16 %27 to i32
759  %add.ptr2.3 = getelementptr inbounds i16, ptr %pIn.019, i32 16
760  %add.ptr3.3 = getelementptr inbounds i16, ptr %pIn.019, i32 14
761  %arrayidx.i13.3 = getelementptr inbounds i16, ptr %pIn.019, i32 15
762  %28 = load i16, ptr %arrayidx.i13.3, align 2
763  %conv.i14.3 = sext i16 %28 to i32
764  %shl.i15.3 = shl nsw i32 %conv.i14.3, 16
765  %29 = load i16, ptr %add.ptr3.3, align 2
766  %conv22.i16.3 = zext i16 %29 to i32
767  %shl.3 = shl nuw i32 %conv22.i.3, 16
768  %shl5.3 = shl nuw i32 %conv22.i16.3, 16
769  %incdec.ptr.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 13
770  store i32 %shl.3, ptr %incdec.ptr9.2, align 4
771  %incdec.ptr7.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 14
772  store i32 %shl.i.3, ptr %incdec.ptr.3, align 4
773  %incdec.ptr8.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 15
774  store i32 %shl5.3, ptr %incdec.ptr7.3, align 4
775  %incdec.ptr9.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 16
776  store i32 %shl.i15.3, ptr %incdec.ptr8.3, align 4
777  %dec.3 = add nsw i32 %blkCnt.020, -4
778  %cmp.not.3 = icmp eq i32 %dec.3, 0
779  br i1 %cmp.not.3, label %while.end, label %while.body
780
781while.end:                                        ; preds = %while.body.prol.loopexit, %while.body, %entry
782  %pIn.0.lcssa = phi ptr [ %pSrc, %entry ], [ %add.ptr2.lcssa.unr, %while.body.prol.loopexit ], [ %add.ptr2.3, %while.body ]
783  %pDst.addr.0.lcssa = phi ptr [ %pDst, %entry ], [ %incdec.ptr9.lcssa.unr, %while.body.prol.loopexit ], [ %incdec.ptr9.3, %while.body ]
784  %rem = and i32 %blockSize, 3
785  %cmp11.not23 = icmp eq i32 %rem, 0
786  br i1 %cmp11.not23, label %while.end17, label %while.body12
787
788while.body12:                                     ; preds = %while.end
789  %30 = load i16, ptr %pIn.0.lcssa, align 2
790  %conv = sext i16 %30 to i32
791  %shl14 = shl nsw i32 %conv, 16
792  store i32 %shl14, ptr %pDst.addr.0.lcssa, align 4
793  %cmp11.not = icmp eq i32 %rem, 1
794  br i1 %cmp11.not, label %while.end17, label %while.body12.1
795
796while.body12.1:                                   ; preds = %while.body12
797  %incdec.ptr15 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 1
798  %incdec.ptr13 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 1
799  %31 = load i16, ptr %incdec.ptr13, align 2
800  %conv.1 = sext i16 %31 to i32
801  %shl14.1 = shl nsw i32 %conv.1, 16
802  store i32 %shl14.1, ptr %incdec.ptr15, align 4
803  %cmp11.not.1 = icmp eq i32 %rem, 2
804  br i1 %cmp11.not.1, label %while.end17, label %while.body12.2
805
806while.body12.2:                                   ; preds = %while.body12.1
807  %incdec.ptr15.1 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 2
808  %incdec.ptr13.1 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 2
809  %32 = load i16, ptr %incdec.ptr13.1, align 2
810  %conv.2 = sext i16 %32 to i32
811  %shl14.2 = shl nsw i32 %conv.2, 16
812  store i32 %shl14.2, ptr %incdec.ptr15.1, align 4
813  br label %while.end17
814
815while.end17:                                      ; preds = %while.body12, %while.body12.1, %while.body12.2, %while.end
816  ret void
817}
818