1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 3; RUN: llc < %s -mtriple=thumbv8m.base-arm-none-eabi < %s | FileCheck %s 4 5define void @arm_q15_to_q31(ptr nocapture noundef readonly %pSrc, ptr nocapture noundef writeonly %pDst, i32 noundef %blockSize) { 6; CHECK-LABEL: arm_q15_to_q31: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 9; CHECK-NEXT: push {r4, r5, r6, r7, lr} 10; CHECK-NEXT: .pad #8 11; CHECK-NEXT: sub sp, #8 12; CHECK-NEXT: mov r7, r2 13; CHECK-NEXT: lsrs r3, r2, #2 14; CHECK-NEXT: beq .LBB0_6 15; CHECK-NEXT: @ %bb.1: @ %while.body.preheader 16; CHECK-NEXT: movs r5, #3 17; CHECK-NEXT: ands r5, r3 18; CHECK-NEXT: subs r2, r3, #1 19; CHECK-NEXT: cbz r5, .LBB0_4 20; CHECK-NEXT: @ %bb.2: @ %while.body.prol 21; CHECK-NEXT: str r2, [sp] @ 4-byte Spill 22; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill 23; CHECK-NEXT: ldrh r2, [r0] 24; CHECK-NEXT: ldrh r7, [r0, #2] 25; CHECK-NEXT: ldrh r4, [r0, #4] 26; CHECK-NEXT: ldrh r6, [r0, #6] 27; CHECK-NEXT: lsls r6, r6, #16 28; CHECK-NEXT: lsls r4, r4, #16 29; CHECK-NEXT: lsls r7, r7, #16 30; CHECK-NEXT: lsls r2, r2, #16 31; CHECK-NEXT: stm r1!, {r2, r7} 32; CHECK-NEXT: str r4, [r1] 33; CHECK-NEXT: str r6, [r1, #4] 34; CHECK-NEXT: subs r1, #8 35; CHECK-NEXT: cmp r5, #1 36; CHECK-NEXT: bne .LBB0_11 37; CHECK-NEXT: @ %bb.3: 38; CHECK-NEXT: adds r1, #16 39; CHECK-NEXT: adds r0, #8 40; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload 41; CHECK-NEXT: mov r3, r2 42; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload 43; CHECK-NEXT: .LBB0_4: @ %while.body.prol.loopexit 44; CHECK-NEXT: cmp r2, #3 45; CHECK-NEXT: blo .LBB0_6 46; CHECK-NEXT: .LBB0_5: @ %while.body 47; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 48; CHECK-NEXT: ldrh r2, [r0] 49; CHECK-NEXT: ldrh r4, [r0, #2] 50; CHECK-NEXT: ldrh r5, [r0, #4] 51; CHECK-NEXT: ldrh r6, [r0, #6] 52; CHECK-NEXT: lsls r6, r6, #16 53; CHECK-NEXT: str r6, [r1, #12] 54; CHECK-NEXT: lsls r5, r5, #16 55; CHECK-NEXT: str r5, [r1, #8] 56; CHECK-NEXT: lsls r4, r4, #16 57; CHECK-NEXT: str r4, [r1, #4] 58; CHECK-NEXT: lsls r2, r2, #16 59; CHECK-NEXT: str r2, [r1] 60; CHECK-NEXT: ldrh r2, [r0, #8] 61; CHECK-NEXT: ldrh r4, [r0, #10] 62; CHECK-NEXT: ldrh r5, [r0, #12] 63; CHECK-NEXT: ldrh r6, [r0, #14] 64; CHECK-NEXT: lsls r6, r6, #16 65; CHECK-NEXT: str r6, [r1, #28] 66; CHECK-NEXT: lsls r5, r5, #16 67; CHECK-NEXT: str r5, [r1, #24] 68; CHECK-NEXT: lsls r4, r4, #16 69; CHECK-NEXT: str r4, [r1, #20] 70; CHECK-NEXT: lsls r2, r2, #16 71; CHECK-NEXT: str r2, [r1, #16] 72; CHECK-NEXT: ldrh r2, [r0, #16] 73; CHECK-NEXT: ldrh r4, [r0, #18] 74; CHECK-NEXT: ldrh r5, [r0, #20] 75; CHECK-NEXT: ldrh r6, [r0, #22] 76; CHECK-NEXT: lsls r6, r6, #16 77; CHECK-NEXT: str r6, [r1, #44] 78; CHECK-NEXT: lsls r5, r5, #16 79; CHECK-NEXT: str r5, [r1, #40] 80; CHECK-NEXT: lsls r4, r4, #16 81; CHECK-NEXT: str r4, [r1, #36] 82; CHECK-NEXT: lsls r2, r2, #16 83; CHECK-NEXT: str r2, [r1, #32] 84; CHECK-NEXT: ldrh r2, [r0, #24] 85; CHECK-NEXT: ldrh r4, [r0, #26] 86; CHECK-NEXT: ldrh r5, [r0, #28] 87; CHECK-NEXT: ldrh r6, [r0, #30] 88; CHECK-NEXT: lsls r6, r6, #16 89; CHECK-NEXT: str r6, [r1, #60] 90; CHECK-NEXT: lsls r5, r5, #16 91; CHECK-NEXT: str r5, [r1, #56] 92; CHECK-NEXT: lsls r4, r4, #16 93; CHECK-NEXT: str r4, [r1, #52] 94; CHECK-NEXT: lsls r2, r2, #16 95; CHECK-NEXT: str r2, [r1, #48] 96; CHECK-NEXT: adds r1, #64 97; CHECK-NEXT: adds r0, #32 98; CHECK-NEXT: subs r3, r3, #4 99; CHECK-NEXT: bne .LBB0_5 100; CHECK-NEXT: .LBB0_6: @ %while.end 101; CHECK-NEXT: movs r2, #3 102; CHECK-NEXT: ands r7, r2 103; CHECK-NEXT: beq .LBB0_10 104; CHECK-NEXT: @ %bb.7: @ %while.body12 105; CHECK-NEXT: ldrh r2, [r0] 106; CHECK-NEXT: lsls r2, r2, #16 107; CHECK-NEXT: str r2, [r1] 108; CHECK-NEXT: cmp r7, #1 109; CHECK-NEXT: beq .LBB0_10 110; CHECK-NEXT: @ %bb.8: @ %while.body12.1 111; CHECK-NEXT: ldrh r2, [r0, #2] 112; CHECK-NEXT: lsls r2, r2, #16 113; CHECK-NEXT: str r2, [r1, #4] 114; CHECK-NEXT: cmp r7, #2 115; CHECK-NEXT: beq .LBB0_10 116; CHECK-NEXT: @ %bb.9: @ %while.body12.2 117; CHECK-NEXT: ldrh r0, [r0, #4] 118; CHECK-NEXT: lsls r0, r0, #16 119; CHECK-NEXT: str r0, [r1, #8] 120; CHECK-NEXT: .LBB0_10: @ %while.end17 121; CHECK-NEXT: add sp, #8 122; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 123; CHECK-NEXT: .LBB0_11: @ %while.body.prol.1 124; CHECK-NEXT: ldrh r2, [r0, #8] 125; CHECK-NEXT: ldrh r4, [r0, #10] 126; CHECK-NEXT: ldrh r6, [r0, #12] 127; CHECK-NEXT: ldrh r7, [r0, #14] 128; CHECK-NEXT: lsls r7, r7, #16 129; CHECK-NEXT: lsls r6, r6, #16 130; CHECK-NEXT: lsls r4, r4, #16 131; CHECK-NEXT: lsls r2, r2, #16 132; CHECK-NEXT: str r2, [r1, #16] 133; CHECK-NEXT: str r4, [r1, #20] 134; CHECK-NEXT: str r6, [r1, #24] 135; CHECK-NEXT: str r7, [r1, #28] 136; CHECK-NEXT: cmp r5, #2 137; CHECK-NEXT: bne .LBB0_13 138; CHECK-NEXT: @ %bb.12: 139; CHECK-NEXT: subs r3, r3, #2 140; CHECK-NEXT: adds r1, #32 141; CHECK-NEXT: adds r0, #16 142; CHECK-NEXT: b .LBB0_14 143; CHECK-NEXT: .LBB0_13: @ %while.body.prol.2 144; CHECK-NEXT: ldrh r2, [r0, #16] 145; CHECK-NEXT: ldrh r4, [r0, #18] 146; CHECK-NEXT: ldrh r5, [r0, #20] 147; CHECK-NEXT: ldrh r6, [r0, #22] 148; CHECK-NEXT: lsls r6, r6, #16 149; CHECK-NEXT: lsls r5, r5, #16 150; CHECK-NEXT: lsls r4, r4, #16 151; CHECK-NEXT: lsls r2, r2, #16 152; CHECK-NEXT: mov r7, r1 153; CHECK-NEXT: adds r7, #32 154; CHECK-NEXT: stm r7!, {r2, r4, r5, r6} 155; CHECK-NEXT: subs r3, r3, #3 156; CHECK-NEXT: adds r1, #48 157; CHECK-NEXT: adds r0, #24 158; CHECK-NEXT: .LBB0_14: @ %while.body.prol.loopexit 159; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload 160; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload 161; CHECK-NEXT: cmp r2, #3 162; CHECK-NEXT: bhs .LBB0_5 163; CHECK-NEXT: b .LBB0_6 164entry: 165 %cmp.not19 = icmp ult i32 %blockSize, 4 166 br i1 %cmp.not19, label %while.end, label %while.body.preheader 167 168while.body.preheader: ; preds = %entry 169 %shr = lshr i32 %blockSize, 2 170 %0 = add nsw i32 %shr, -1 171 %xtraiter = and i32 %shr, 3 172 %lcmp.mod.not = icmp eq i32 %xtraiter, 0 173 br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol 174 175while.body.prol: ; preds = %while.body.preheader 176 %pIn.0.val.prol = load i16, ptr %pSrc, align 2 177 %1 = getelementptr i8, ptr %pSrc, i32 2 178 %pIn.0.val13.prol = load i16, ptr %1, align 2 179 %conv.i.prol = sext i16 %pIn.0.val13.prol to i32 180 %shl.i.prol = shl nsw i32 %conv.i.prol, 16 181 %conv22.i.prol = zext i16 %pIn.0.val.prol to i32 182 %add.ptr2.prol = getelementptr inbounds i16, ptr %pSrc, i32 4 183 %add.ptr3.prol = getelementptr inbounds i16, ptr %pSrc, i32 2 184 %add.ptr3.val.prol = load i16, ptr %add.ptr3.prol, align 2 185 %2 = getelementptr i16, ptr %pSrc, i32 3 186 %add.ptr3.val14.prol = load i16, ptr %2, align 2 187 %conv.i15.prol = sext i16 %add.ptr3.val14.prol to i32 188 %shl.i16.prol = shl nsw i32 %conv.i15.prol, 16 189 %conv22.i17.prol = zext i16 %add.ptr3.val.prol to i32 190 %shl.prol = shl nuw i32 %conv22.i.prol, 16 191 %shl5.prol = shl nuw i32 %conv22.i17.prol, 16 192 %incdec.ptr.prol = getelementptr inbounds i32, ptr %pDst, i32 1 193 store i32 %shl.prol, ptr %pDst, align 4 194 %incdec.ptr7.prol = getelementptr inbounds i32, ptr %pDst, i32 2 195 store i32 %shl.i.prol, ptr %incdec.ptr.prol, align 4 196 %incdec.ptr8.prol = getelementptr inbounds i32, ptr %pDst, i32 3 197 store i32 %shl5.prol, ptr %incdec.ptr7.prol, align 4 198 %incdec.ptr9.prol = getelementptr inbounds i32, ptr %pDst, i32 4 199 store i32 %shl.i16.prol, ptr %incdec.ptr8.prol, align 4 200 %dec.prol = add nsw i32 %shr, -1 201 %prol.iter.cmp.not = icmp eq i32 %xtraiter, 1 202 br i1 %prol.iter.cmp.not, label %while.body.prol.loopexit, label %while.body.prol.1 203 204while.body.prol.1: ; preds = %while.body.prol 205 %pIn.0.val.prol.1 = load i16, ptr %add.ptr2.prol, align 2 206 %3 = getelementptr i16, ptr %pSrc, i32 5 207 %pIn.0.val13.prol.1 = load i16, ptr %3, align 2 208 %conv.i.prol.1 = sext i16 %pIn.0.val13.prol.1 to i32 209 %shl.i.prol.1 = shl nsw i32 %conv.i.prol.1, 16 210 %conv22.i.prol.1 = zext i16 %pIn.0.val.prol.1 to i32 211 %add.ptr2.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 8 212 %add.ptr3.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 6 213 %add.ptr3.val.prol.1 = load i16, ptr %add.ptr3.prol.1, align 2 214 %4 = getelementptr i16, ptr %pSrc, i32 7 215 %add.ptr3.val14.prol.1 = load i16, ptr %4, align 2 216 %conv.i15.prol.1 = sext i16 %add.ptr3.val14.prol.1 to i32 217 %shl.i16.prol.1 = shl nsw i32 %conv.i15.prol.1, 16 218 %conv22.i17.prol.1 = zext i16 %add.ptr3.val.prol.1 to i32 219 %shl.prol.1 = shl nuw i32 %conv22.i.prol.1, 16 220 %shl5.prol.1 = shl nuw i32 %conv22.i17.prol.1, 16 221 %incdec.ptr.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 5 222 store i32 %shl.prol.1, ptr %incdec.ptr9.prol, align 4 223 %incdec.ptr7.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 6 224 store i32 %shl.i.prol.1, ptr %incdec.ptr.prol.1, align 4 225 %incdec.ptr8.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 7 226 store i32 %shl5.prol.1, ptr %incdec.ptr7.prol.1, align 4 227 %incdec.ptr9.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 8 228 store i32 %shl.i16.prol.1, ptr %incdec.ptr8.prol.1, align 4 229 %dec.prol.1 = add nsw i32 %shr, -2 230 %prol.iter.cmp.1.not = icmp eq i32 %xtraiter, 2 231 br i1 %prol.iter.cmp.1.not, label %while.body.prol.loopexit, label %while.body.prol.2 232 233while.body.prol.2: ; preds = %while.body.prol.1 234 %pIn.0.val.prol.2 = load i16, ptr %add.ptr2.prol.1, align 2 235 %5 = getelementptr i16, ptr %pSrc, i32 9 236 %pIn.0.val13.prol.2 = load i16, ptr %5, align 2 237 %conv.i.prol.2 = sext i16 %pIn.0.val13.prol.2 to i32 238 %shl.i.prol.2 = shl nsw i32 %conv.i.prol.2, 16 239 %conv22.i.prol.2 = zext i16 %pIn.0.val.prol.2 to i32 240 %add.ptr2.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 12 241 %add.ptr3.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 10 242 %add.ptr3.val.prol.2 = load i16, ptr %add.ptr3.prol.2, align 2 243 %6 = getelementptr i16, ptr %pSrc, i32 11 244 %add.ptr3.val14.prol.2 = load i16, ptr %6, align 2 245 %conv.i15.prol.2 = sext i16 %add.ptr3.val14.prol.2 to i32 246 %shl.i16.prol.2 = shl nsw i32 %conv.i15.prol.2, 16 247 %conv22.i17.prol.2 = zext i16 %add.ptr3.val.prol.2 to i32 248 %shl.prol.2 = shl nuw i32 %conv22.i.prol.2, 16 249 %shl5.prol.2 = shl nuw i32 %conv22.i17.prol.2, 16 250 %incdec.ptr.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 9 251 store i32 %shl.prol.2, ptr %incdec.ptr9.prol.1, align 4 252 %incdec.ptr7.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 10 253 store i32 %shl.i.prol.2, ptr %incdec.ptr.prol.2, align 4 254 %incdec.ptr8.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 11 255 store i32 %shl5.prol.2, ptr %incdec.ptr7.prol.2, align 4 256 %incdec.ptr9.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 12 257 store i32 %shl.i16.prol.2, ptr %incdec.ptr8.prol.2, align 4 258 %dec.prol.2 = add nsw i32 %shr, -3 259 br label %while.body.prol.loopexit 260 261while.body.prol.loopexit: ; preds = %while.body.prol, %while.body.prol.1, %while.body.prol.2, %while.body.preheader 262 %add.ptr2.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ] 263 %incdec.ptr9.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ] 264 %pDst.addr.022.unr = phi ptr [ %pDst, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ] 265 %blkCnt.021.unr = phi i32 [ %shr, %while.body.preheader ], [ %dec.prol, %while.body.prol ], [ %dec.prol.1, %while.body.prol.1 ], [ %dec.prol.2, %while.body.prol.2 ] 266 %pIn.020.unr = phi ptr [ %pSrc, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ] 267 %7 = icmp ult i32 %0, 3 268 br i1 %7, label %while.end, label %while.body 269 270while.body: ; preds = %while.body.prol.loopexit, %while.body 271 %pDst.addr.022 = phi ptr [ %incdec.ptr9.3, %while.body ], [ %pDst.addr.022.unr, %while.body.prol.loopexit ] 272 %blkCnt.021 = phi i32 [ %dec.3, %while.body ], [ %blkCnt.021.unr, %while.body.prol.loopexit ] 273 %pIn.020 = phi ptr [ %add.ptr2.3, %while.body ], [ %pIn.020.unr, %while.body.prol.loopexit ] 274 %pIn.0.val = load i16, ptr %pIn.020, align 2 275 %8 = getelementptr i8, ptr %pIn.020, i32 2 276 %pIn.0.val13 = load i16, ptr %8, align 2 277 %conv.i = sext i16 %pIn.0.val13 to i32 278 %shl.i = shl nsw i32 %conv.i, 16 279 %conv22.i = zext i16 %pIn.0.val to i32 280 %add.ptr2 = getelementptr inbounds i16, ptr %pIn.020, i32 4 281 %add.ptr3 = getelementptr inbounds i16, ptr %pIn.020, i32 2 282 %add.ptr3.val = load i16, ptr %add.ptr3, align 2 283 %9 = getelementptr i16, ptr %pIn.020, i32 3 284 %add.ptr3.val14 = load i16, ptr %9, align 2 285 %conv.i15 = sext i16 %add.ptr3.val14 to i32 286 %shl.i16 = shl nsw i32 %conv.i15, 16 287 %conv22.i17 = zext i16 %add.ptr3.val to i32 288 %shl = shl nuw i32 %conv22.i, 16 289 %shl5 = shl nuw i32 %conv22.i17, 16 290 %incdec.ptr = getelementptr inbounds i32, ptr %pDst.addr.022, i32 1 291 store i32 %shl, ptr %pDst.addr.022, align 4 292 %incdec.ptr7 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 2 293 store i32 %shl.i, ptr %incdec.ptr, align 4 294 %incdec.ptr8 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 3 295 store i32 %shl5, ptr %incdec.ptr7, align 4 296 %incdec.ptr9 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 4 297 store i32 %shl.i16, ptr %incdec.ptr8, align 4 298 %pIn.0.val.1 = load i16, ptr %add.ptr2, align 2 299 %10 = getelementptr i16, ptr %pIn.020, i32 5 300 %pIn.0.val13.1 = load i16, ptr %10, align 2 301 %conv.i.1 = sext i16 %pIn.0.val13.1 to i32 302 %shl.i.1 = shl nsw i32 %conv.i.1, 16 303 %conv22.i.1 = zext i16 %pIn.0.val.1 to i32 304 %add.ptr2.1 = getelementptr inbounds i16, ptr %pIn.020, i32 8 305 %add.ptr3.1 = getelementptr inbounds i16, ptr %pIn.020, i32 6 306 %add.ptr3.val.1 = load i16, ptr %add.ptr3.1, align 2 307 %11 = getelementptr i16, ptr %pIn.020, i32 7 308 %add.ptr3.val14.1 = load i16, ptr %11, align 2 309 %conv.i15.1 = sext i16 %add.ptr3.val14.1 to i32 310 %shl.i16.1 = shl nsw i32 %conv.i15.1, 16 311 %conv22.i17.1 = zext i16 %add.ptr3.val.1 to i32 312 %shl.1 = shl nuw i32 %conv22.i.1, 16 313 %shl5.1 = shl nuw i32 %conv22.i17.1, 16 314 %incdec.ptr.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 5 315 store i32 %shl.1, ptr %incdec.ptr9, align 4 316 %incdec.ptr7.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 6 317 store i32 %shl.i.1, ptr %incdec.ptr.1, align 4 318 %incdec.ptr8.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 7 319 store i32 %shl5.1, ptr %incdec.ptr7.1, align 4 320 %incdec.ptr9.1 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 8 321 store i32 %shl.i16.1, ptr %incdec.ptr8.1, align 4 322 %pIn.0.val.2 = load i16, ptr %add.ptr2.1, align 2 323 %12 = getelementptr i16, ptr %pIn.020, i32 9 324 %pIn.0.val13.2 = load i16, ptr %12, align 2 325 %conv.i.2 = sext i16 %pIn.0.val13.2 to i32 326 %shl.i.2 = shl nsw i32 %conv.i.2, 16 327 %conv22.i.2 = zext i16 %pIn.0.val.2 to i32 328 %add.ptr2.2 = getelementptr inbounds i16, ptr %pIn.020, i32 12 329 %add.ptr3.2 = getelementptr inbounds i16, ptr %pIn.020, i32 10 330 %add.ptr3.val.2 = load i16, ptr %add.ptr3.2, align 2 331 %13 = getelementptr i16, ptr %pIn.020, i32 11 332 %add.ptr3.val14.2 = load i16, ptr %13, align 2 333 %conv.i15.2 = sext i16 %add.ptr3.val14.2 to i32 334 %shl.i16.2 = shl nsw i32 %conv.i15.2, 16 335 %conv22.i17.2 = zext i16 %add.ptr3.val.2 to i32 336 %shl.2 = shl nuw i32 %conv22.i.2, 16 337 %shl5.2 = shl nuw i32 %conv22.i17.2, 16 338 %incdec.ptr.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 9 339 store i32 %shl.2, ptr %incdec.ptr9.1, align 4 340 %incdec.ptr7.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 10 341 store i32 %shl.i.2, ptr %incdec.ptr.2, align 4 342 %incdec.ptr8.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 11 343 store i32 %shl5.2, ptr %incdec.ptr7.2, align 4 344 %incdec.ptr9.2 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 12 345 store i32 %shl.i16.2, ptr %incdec.ptr8.2, align 4 346 %pIn.0.val.3 = load i16, ptr %add.ptr2.2, align 2 347 %14 = getelementptr i16, ptr %pIn.020, i32 13 348 %pIn.0.val13.3 = load i16, ptr %14, align 2 349 %conv.i.3 = sext i16 %pIn.0.val13.3 to i32 350 %shl.i.3 = shl nsw i32 %conv.i.3, 16 351 %conv22.i.3 = zext i16 %pIn.0.val.3 to i32 352 %add.ptr2.3 = getelementptr inbounds i16, ptr %pIn.020, i32 16 353 %add.ptr3.3 = getelementptr inbounds i16, ptr %pIn.020, i32 14 354 %add.ptr3.val.3 = load i16, ptr %add.ptr3.3, align 2 355 %15 = getelementptr i16, ptr %pIn.020, i32 15 356 %add.ptr3.val14.3 = load i16, ptr %15, align 2 357 %conv.i15.3 = sext i16 %add.ptr3.val14.3 to i32 358 %shl.i16.3 = shl nsw i32 %conv.i15.3, 16 359 %conv22.i17.3 = zext i16 %add.ptr3.val.3 to i32 360 %shl.3 = shl nuw i32 %conv22.i.3, 16 361 %shl5.3 = shl nuw i32 %conv22.i17.3, 16 362 %incdec.ptr.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 13 363 store i32 %shl.3, ptr %incdec.ptr9.2, align 4 364 %incdec.ptr7.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 14 365 store i32 %shl.i.3, ptr %incdec.ptr.3, align 4 366 %incdec.ptr8.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 15 367 store i32 %shl5.3, ptr %incdec.ptr7.3, align 4 368 %incdec.ptr9.3 = getelementptr inbounds i32, ptr %pDst.addr.022, i32 16 369 store i32 %shl.i16.3, ptr %incdec.ptr8.3, align 4 370 %dec.3 = add nsw i32 %blkCnt.021, -4 371 %cmp.not.3 = icmp eq i32 %dec.3, 0 372 br i1 %cmp.not.3, label %while.end, label %while.body 373 374while.end: ; preds = %while.body.prol.loopexit, %while.body, %entry 375 %pIn.0.lcssa = phi ptr [ %pSrc, %entry ], [ %add.ptr2.lcssa.unr, %while.body.prol.loopexit ], [ %add.ptr2.3, %while.body ] 376 %pDst.addr.0.lcssa = phi ptr [ %pDst, %entry ], [ %incdec.ptr9.lcssa.unr, %while.body.prol.loopexit ], [ %incdec.ptr9.3, %while.body ] 377 %rem = and i32 %blockSize, 3 378 %cmp11.not24 = icmp eq i32 %rem, 0 379 br i1 %cmp11.not24, label %while.end17, label %while.body12 380 381while.body12: ; preds = %while.end 382 %16 = load i16, ptr %pIn.0.lcssa, align 2 383 %conv = sext i16 %16 to i32 384 %shl14 = shl nsw i32 %conv, 16 385 store i32 %shl14, ptr %pDst.addr.0.lcssa, align 4 386 %cmp11.not = icmp eq i32 %rem, 1 387 br i1 %cmp11.not, label %while.end17, label %while.body12.1 388 389while.body12.1: ; preds = %while.body12 390 %incdec.ptr15 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 1 391 %incdec.ptr13 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 1 392 %17 = load i16, ptr %incdec.ptr13, align 2 393 %conv.1 = sext i16 %17 to i32 394 %shl14.1 = shl nsw i32 %conv.1, 16 395 store i32 %shl14.1, ptr %incdec.ptr15, align 4 396 %cmp11.not.1 = icmp eq i32 %rem, 2 397 br i1 %cmp11.not.1, label %while.end17, label %while.body12.2 398 399while.body12.2: ; preds = %while.body12.1 400 %incdec.ptr15.1 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 2 401 %incdec.ptr13.1 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 2 402 %18 = load i16, ptr %incdec.ptr13.1, align 2 403 %conv.2 = sext i16 %18 to i32 404 %shl14.2 = shl nsw i32 %conv.2, 16 405 store i32 %shl14.2, ptr %incdec.ptr15.1, align 4 406 br label %while.end17 407 408while.end17: ; preds = %while.body12, %while.body12.1, %while.body12.2, %while.end 409 ret void 410} 411 412define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr nocapture noundef writeonly %pDst, i32 noundef %blockSize) { 413; CHECK-LABEL: arm_q15_to_q31_altorder: 414; CHECK: @ %bb.0: @ %entry 415; CHECK-NEXT: .save {r4, r5, r6, r7, lr} 416; CHECK-NEXT: push {r4, r5, r6, r7, lr} 417; CHECK-NEXT: .pad #8 418; CHECK-NEXT: sub sp, #8 419; CHECK-NEXT: mov r7, r2 420; CHECK-NEXT: lsrs r3, r2, #2 421; CHECK-NEXT: beq .LBB1_6 422; CHECK-NEXT: @ %bb.1: @ %while.body.preheader 423; CHECK-NEXT: movs r5, #3 424; CHECK-NEXT: ands r5, r3 425; CHECK-NEXT: subs r2, r3, #1 426; CHECK-NEXT: cbz r5, .LBB1_4 427; CHECK-NEXT: @ %bb.2: @ %while.body.prol 428; CHECK-NEXT: str r2, [sp] @ 4-byte Spill 429; CHECK-NEXT: str r7, [sp, #4] @ 4-byte Spill 430; CHECK-NEXT: ldrh r2, [r0] 431; CHECK-NEXT: ldrh r7, [r0, #2] 432; CHECK-NEXT: ldrh r4, [r0, #4] 433; CHECK-NEXT: ldrh r6, [r0, #6] 434; CHECK-NEXT: lsls r6, r6, #16 435; CHECK-NEXT: lsls r4, r4, #16 436; CHECK-NEXT: lsls r7, r7, #16 437; CHECK-NEXT: lsls r2, r2, #16 438; CHECK-NEXT: stm r1!, {r2, r7} 439; CHECK-NEXT: str r4, [r1] 440; CHECK-NEXT: str r6, [r1, #4] 441; CHECK-NEXT: subs r1, #8 442; CHECK-NEXT: cmp r5, #1 443; CHECK-NEXT: bne .LBB1_11 444; CHECK-NEXT: @ %bb.3: 445; CHECK-NEXT: adds r1, #16 446; CHECK-NEXT: adds r0, #8 447; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload 448; CHECK-NEXT: mov r3, r2 449; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload 450; CHECK-NEXT: .LBB1_4: @ %while.body.prol.loopexit 451; CHECK-NEXT: cmp r2, #3 452; CHECK-NEXT: blo .LBB1_6 453; CHECK-NEXT: .LBB1_5: @ %while.body 454; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 455; CHECK-NEXT: ldrh r2, [r0] 456; CHECK-NEXT: ldrh r4, [r0, #2] 457; CHECK-NEXT: ldrh r5, [r0, #4] 458; CHECK-NEXT: ldrh r6, [r0, #6] 459; CHECK-NEXT: lsls r6, r6, #16 460; CHECK-NEXT: str r6, [r1, #12] 461; CHECK-NEXT: lsls r5, r5, #16 462; CHECK-NEXT: str r5, [r1, #8] 463; CHECK-NEXT: lsls r4, r4, #16 464; CHECK-NEXT: str r4, [r1, #4] 465; CHECK-NEXT: lsls r2, r2, #16 466; CHECK-NEXT: str r2, [r1] 467; CHECK-NEXT: ldrh r2, [r0, #8] 468; CHECK-NEXT: ldrh r4, [r0, #10] 469; CHECK-NEXT: ldrh r5, [r0, #12] 470; CHECK-NEXT: ldrh r6, [r0, #14] 471; CHECK-NEXT: lsls r6, r6, #16 472; CHECK-NEXT: str r6, [r1, #28] 473; CHECK-NEXT: lsls r5, r5, #16 474; CHECK-NEXT: str r5, [r1, #24] 475; CHECK-NEXT: lsls r4, r4, #16 476; CHECK-NEXT: str r4, [r1, #20] 477; CHECK-NEXT: lsls r2, r2, #16 478; CHECK-NEXT: str r2, [r1, #16] 479; CHECK-NEXT: ldrh r2, [r0, #16] 480; CHECK-NEXT: ldrh r4, [r0, #18] 481; CHECK-NEXT: ldrh r5, [r0, #20] 482; CHECK-NEXT: ldrh r6, [r0, #22] 483; CHECK-NEXT: lsls r6, r6, #16 484; CHECK-NEXT: str r6, [r1, #44] 485; CHECK-NEXT: lsls r5, r5, #16 486; CHECK-NEXT: str r5, [r1, #40] 487; CHECK-NEXT: lsls r4, r4, #16 488; CHECK-NEXT: str r4, [r1, #36] 489; CHECK-NEXT: lsls r2, r2, #16 490; CHECK-NEXT: str r2, [r1, #32] 491; CHECK-NEXT: ldrh r2, [r0, #24] 492; CHECK-NEXT: ldrh r4, [r0, #26] 493; CHECK-NEXT: ldrh r5, [r0, #28] 494; CHECK-NEXT: ldrh r6, [r0, #30] 495; CHECK-NEXT: lsls r6, r6, #16 496; CHECK-NEXT: str r6, [r1, #60] 497; CHECK-NEXT: lsls r5, r5, #16 498; CHECK-NEXT: str r5, [r1, #56] 499; CHECK-NEXT: lsls r4, r4, #16 500; CHECK-NEXT: str r4, [r1, #52] 501; CHECK-NEXT: lsls r2, r2, #16 502; CHECK-NEXT: str r2, [r1, #48] 503; CHECK-NEXT: adds r1, #64 504; CHECK-NEXT: adds r0, #32 505; CHECK-NEXT: subs r3, r3, #4 506; CHECK-NEXT: bne .LBB1_5 507; CHECK-NEXT: .LBB1_6: @ %while.end 508; CHECK-NEXT: movs r2, #3 509; CHECK-NEXT: ands r7, r2 510; CHECK-NEXT: beq .LBB1_10 511; CHECK-NEXT: @ %bb.7: @ %while.body12 512; CHECK-NEXT: ldrh r2, [r0] 513; CHECK-NEXT: lsls r2, r2, #16 514; CHECK-NEXT: str r2, [r1] 515; CHECK-NEXT: cmp r7, #1 516; CHECK-NEXT: beq .LBB1_10 517; CHECK-NEXT: @ %bb.8: @ %while.body12.1 518; CHECK-NEXT: ldrh r2, [r0, #2] 519; CHECK-NEXT: lsls r2, r2, #16 520; CHECK-NEXT: str r2, [r1, #4] 521; CHECK-NEXT: cmp r7, #2 522; CHECK-NEXT: beq .LBB1_10 523; CHECK-NEXT: @ %bb.9: @ %while.body12.2 524; CHECK-NEXT: ldrh r0, [r0, #4] 525; CHECK-NEXT: lsls r0, r0, #16 526; CHECK-NEXT: str r0, [r1, #8] 527; CHECK-NEXT: .LBB1_10: @ %while.end17 528; CHECK-NEXT: add sp, #8 529; CHECK-NEXT: pop {r4, r5, r6, r7, pc} 530; CHECK-NEXT: .LBB1_11: @ %while.body.prol.1 531; CHECK-NEXT: ldrh r2, [r0, #8] 532; CHECK-NEXT: ldrh r4, [r0, #10] 533; CHECK-NEXT: ldrh r6, [r0, #12] 534; CHECK-NEXT: ldrh r7, [r0, #14] 535; CHECK-NEXT: lsls r7, r7, #16 536; CHECK-NEXT: lsls r6, r6, #16 537; CHECK-NEXT: lsls r4, r4, #16 538; CHECK-NEXT: lsls r2, r2, #16 539; CHECK-NEXT: str r2, [r1, #16] 540; CHECK-NEXT: str r4, [r1, #20] 541; CHECK-NEXT: str r6, [r1, #24] 542; CHECK-NEXT: str r7, [r1, #28] 543; CHECK-NEXT: cmp r5, #2 544; CHECK-NEXT: bne .LBB1_13 545; CHECK-NEXT: @ %bb.12: 546; CHECK-NEXT: subs r3, r3, #2 547; CHECK-NEXT: adds r1, #32 548; CHECK-NEXT: adds r0, #16 549; CHECK-NEXT: b .LBB1_14 550; CHECK-NEXT: .LBB1_13: @ %while.body.prol.2 551; CHECK-NEXT: ldrh r2, [r0, #16] 552; CHECK-NEXT: ldrh r4, [r0, #18] 553; CHECK-NEXT: ldrh r5, [r0, #20] 554; CHECK-NEXT: ldrh r6, [r0, #22] 555; CHECK-NEXT: lsls r6, r6, #16 556; CHECK-NEXT: lsls r5, r5, #16 557; CHECK-NEXT: lsls r4, r4, #16 558; CHECK-NEXT: lsls r2, r2, #16 559; CHECK-NEXT: mov r7, r1 560; CHECK-NEXT: adds r7, #32 561; CHECK-NEXT: stm r7!, {r2, r4, r5, r6} 562; CHECK-NEXT: subs r3, r3, #3 563; CHECK-NEXT: adds r1, #48 564; CHECK-NEXT: adds r0, #24 565; CHECK-NEXT: .LBB1_14: @ %while.body.prol.loopexit 566; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload 567; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload 568; CHECK-NEXT: cmp r2, #3 569; CHECK-NEXT: bhs .LBB1_5 570; CHECK-NEXT: b .LBB1_6 571entry: 572 %cmp.not18 = icmp ult i32 %blockSize, 4 573 br i1 %cmp.not18, label %while.end, label %while.body.preheader 574 575while.body.preheader: ; preds = %entry 576 %shr = lshr i32 %blockSize, 2 577 %0 = add nsw i32 %shr, -1 578 %xtraiter = and i32 %shr, 3 579 %lcmp.mod.not = icmp eq i32 %xtraiter, 0 580 br i1 %lcmp.mod.not, label %while.body.prol.loopexit, label %while.body.prol 581 582while.body.prol: ; preds = %while.body.preheader 583 %arrayidx.i.prol = getelementptr inbounds i16, ptr %pSrc, i32 1 584 %1 = load i16, ptr %arrayidx.i.prol, align 2 585 %conv.i.prol = sext i16 %1 to i32 586 %shl.i.prol = shl nsw i32 %conv.i.prol, 16 587 %2 = load i16, ptr %pSrc, align 2 588 %conv22.i.prol = zext i16 %2 to i32 589 %add.ptr2.prol = getelementptr inbounds i16, ptr %pSrc, i32 4 590 %add.ptr3.prol = getelementptr inbounds i16, ptr %pSrc, i32 2 591 %arrayidx.i13.prol = getelementptr inbounds i16, ptr %pSrc, i32 3 592 %3 = load i16, ptr %arrayidx.i13.prol, align 2 593 %conv.i14.prol = sext i16 %3 to i32 594 %shl.i15.prol = shl nsw i32 %conv.i14.prol, 16 595 %4 = load i16, ptr %add.ptr3.prol, align 2 596 %conv22.i16.prol = zext i16 %4 to i32 597 %shl.prol = shl nuw i32 %conv22.i.prol, 16 598 %shl5.prol = shl nuw i32 %conv22.i16.prol, 16 599 %incdec.ptr.prol = getelementptr inbounds i32, ptr %pDst, i32 1 600 store i32 %shl.prol, ptr %pDst, align 4 601 %incdec.ptr7.prol = getelementptr inbounds i32, ptr %pDst, i32 2 602 store i32 %shl.i.prol, ptr %incdec.ptr.prol, align 4 603 %incdec.ptr8.prol = getelementptr inbounds i32, ptr %pDst, i32 3 604 store i32 %shl5.prol, ptr %incdec.ptr7.prol, align 4 605 %incdec.ptr9.prol = getelementptr inbounds i32, ptr %pDst, i32 4 606 store i32 %shl.i15.prol, ptr %incdec.ptr8.prol, align 4 607 %dec.prol = add nsw i32 %shr, -1 608 %prol.iter.cmp.not = icmp eq i32 %xtraiter, 1 609 br i1 %prol.iter.cmp.not, label %while.body.prol.loopexit, label %while.body.prol.1 610 611while.body.prol.1: ; preds = %while.body.prol 612 %arrayidx.i.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 5 613 %5 = load i16, ptr %arrayidx.i.prol.1, align 2 614 %conv.i.prol.1 = sext i16 %5 to i32 615 %shl.i.prol.1 = shl nsw i32 %conv.i.prol.1, 16 616 %6 = load i16, ptr %add.ptr2.prol, align 2 617 %conv22.i.prol.1 = zext i16 %6 to i32 618 %add.ptr2.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 8 619 %add.ptr3.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 6 620 %arrayidx.i13.prol.1 = getelementptr inbounds i16, ptr %pSrc, i32 7 621 %7 = load i16, ptr %arrayidx.i13.prol.1, align 2 622 %conv.i14.prol.1 = sext i16 %7 to i32 623 %shl.i15.prol.1 = shl nsw i32 %conv.i14.prol.1, 16 624 %8 = load i16, ptr %add.ptr3.prol.1, align 2 625 %conv22.i16.prol.1 = zext i16 %8 to i32 626 %shl.prol.1 = shl nuw i32 %conv22.i.prol.1, 16 627 %shl5.prol.1 = shl nuw i32 %conv22.i16.prol.1, 16 628 %incdec.ptr.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 5 629 store i32 %shl.prol.1, ptr %incdec.ptr9.prol, align 4 630 %incdec.ptr7.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 6 631 store i32 %shl.i.prol.1, ptr %incdec.ptr.prol.1, align 4 632 %incdec.ptr8.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 7 633 store i32 %shl5.prol.1, ptr %incdec.ptr7.prol.1, align 4 634 %incdec.ptr9.prol.1 = getelementptr inbounds i32, ptr %pDst, i32 8 635 store i32 %shl.i15.prol.1, ptr %incdec.ptr8.prol.1, align 4 636 %dec.prol.1 = add nsw i32 %shr, -2 637 %prol.iter.cmp.1.not = icmp eq i32 %xtraiter, 2 638 br i1 %prol.iter.cmp.1.not, label %while.body.prol.loopexit, label %while.body.prol.2 639 640while.body.prol.2: ; preds = %while.body.prol.1 641 %arrayidx.i.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 9 642 %9 = load i16, ptr %arrayidx.i.prol.2, align 2 643 %conv.i.prol.2 = sext i16 %9 to i32 644 %shl.i.prol.2 = shl nsw i32 %conv.i.prol.2, 16 645 %10 = load i16, ptr %add.ptr2.prol.1, align 2 646 %conv22.i.prol.2 = zext i16 %10 to i32 647 %add.ptr2.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 12 648 %add.ptr3.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 10 649 %arrayidx.i13.prol.2 = getelementptr inbounds i16, ptr %pSrc, i32 11 650 %11 = load i16, ptr %arrayidx.i13.prol.2, align 2 651 %conv.i14.prol.2 = sext i16 %11 to i32 652 %shl.i15.prol.2 = shl nsw i32 %conv.i14.prol.2, 16 653 %12 = load i16, ptr %add.ptr3.prol.2, align 2 654 %conv22.i16.prol.2 = zext i16 %12 to i32 655 %shl.prol.2 = shl nuw i32 %conv22.i.prol.2, 16 656 %shl5.prol.2 = shl nuw i32 %conv22.i16.prol.2, 16 657 %incdec.ptr.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 9 658 store i32 %shl.prol.2, ptr %incdec.ptr9.prol.1, align 4 659 %incdec.ptr7.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 10 660 store i32 %shl.i.prol.2, ptr %incdec.ptr.prol.2, align 4 661 %incdec.ptr8.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 11 662 store i32 %shl5.prol.2, ptr %incdec.ptr7.prol.2, align 4 663 %incdec.ptr9.prol.2 = getelementptr inbounds i32, ptr %pDst, i32 12 664 store i32 %shl.i15.prol.2, ptr %incdec.ptr8.prol.2, align 4 665 %dec.prol.2 = add nsw i32 %shr, -3 666 br label %while.body.prol.loopexit 667 668while.body.prol.loopexit: ; preds = %while.body.prol, %while.body.prol.1, %while.body.prol.2, %while.body.preheader 669 %add.ptr2.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ] 670 %incdec.ptr9.lcssa.unr = phi ptr [ undef, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ] 671 %pDst.addr.021.unr = phi ptr [ %pDst, %while.body.preheader ], [ %incdec.ptr9.prol, %while.body.prol ], [ %incdec.ptr9.prol.1, %while.body.prol.1 ], [ %incdec.ptr9.prol.2, %while.body.prol.2 ] 672 %blkCnt.020.unr = phi i32 [ %shr, %while.body.preheader ], [ %dec.prol, %while.body.prol ], [ %dec.prol.1, %while.body.prol.1 ], [ %dec.prol.2, %while.body.prol.2 ] 673 %pIn.019.unr = phi ptr [ %pSrc, %while.body.preheader ], [ %add.ptr2.prol, %while.body.prol ], [ %add.ptr2.prol.1, %while.body.prol.1 ], [ %add.ptr2.prol.2, %while.body.prol.2 ] 674 %13 = icmp ult i32 %0, 3 675 br i1 %13, label %while.end, label %while.body 676 677while.body: ; preds = %while.body.prol.loopexit, %while.body 678 %pDst.addr.021 = phi ptr [ %incdec.ptr9.3, %while.body ], [ %pDst.addr.021.unr, %while.body.prol.loopexit ] 679 %blkCnt.020 = phi i32 [ %dec.3, %while.body ], [ %blkCnt.020.unr, %while.body.prol.loopexit ] 680 %pIn.019 = phi ptr [ %add.ptr2.3, %while.body ], [ %pIn.019.unr, %while.body.prol.loopexit ] 681 %arrayidx.i = getelementptr inbounds i16, ptr %pIn.019, i32 1 682 %14 = load i16, ptr %arrayidx.i, align 2 683 %conv.i = sext i16 %14 to i32 684 %shl.i = shl nsw i32 %conv.i, 16 685 %15 = load i16, ptr %pIn.019, align 2 686 %conv22.i = zext i16 %15 to i32 687 %add.ptr2 = getelementptr inbounds i16, ptr %pIn.019, i32 4 688 %add.ptr3 = getelementptr inbounds i16, ptr %pIn.019, i32 2 689 %arrayidx.i13 = getelementptr inbounds i16, ptr %pIn.019, i32 3 690 %16 = load i16, ptr %arrayidx.i13, align 2 691 %conv.i14 = sext i16 %16 to i32 692 %shl.i15 = shl nsw i32 %conv.i14, 16 693 %17 = load i16, ptr %add.ptr3, align 2 694 %conv22.i16 = zext i16 %17 to i32 695 %shl = shl nuw i32 %conv22.i, 16 696 %shl5 = shl nuw i32 %conv22.i16, 16 697 %incdec.ptr = getelementptr inbounds i32, ptr %pDst.addr.021, i32 1 698 store i32 %shl, ptr %pDst.addr.021, align 4 699 %incdec.ptr7 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 2 700 store i32 %shl.i, ptr %incdec.ptr, align 4 701 %incdec.ptr8 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 3 702 store i32 %shl5, ptr %incdec.ptr7, align 4 703 %incdec.ptr9 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 4 704 store i32 %shl.i15, ptr %incdec.ptr8, align 4 705 %arrayidx.i.1 = getelementptr inbounds i16, ptr %pIn.019, i32 5 706 %18 = load i16, ptr %arrayidx.i.1, align 2 707 %conv.i.1 = sext i16 %18 to i32 708 %shl.i.1 = shl nsw i32 %conv.i.1, 16 709 %19 = load i16, ptr %add.ptr2, align 2 710 %conv22.i.1 = zext i16 %19 to i32 711 %add.ptr2.1 = getelementptr inbounds i16, ptr %pIn.019, i32 8 712 %add.ptr3.1 = getelementptr inbounds i16, ptr %pIn.019, i32 6 713 %arrayidx.i13.1 = getelementptr inbounds i16, ptr %pIn.019, i32 7 714 %20 = load i16, ptr %arrayidx.i13.1, align 2 715 %conv.i14.1 = sext i16 %20 to i32 716 %shl.i15.1 = shl nsw i32 %conv.i14.1, 16 717 %21 = load i16, ptr %add.ptr3.1, align 2 718 %conv22.i16.1 = zext i16 %21 to i32 719 %shl.1 = shl nuw i32 %conv22.i.1, 16 720 %shl5.1 = shl nuw i32 %conv22.i16.1, 16 721 %incdec.ptr.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 5 722 store i32 %shl.1, ptr %incdec.ptr9, align 4 723 %incdec.ptr7.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 6 724 store i32 %shl.i.1, ptr %incdec.ptr.1, align 4 725 %incdec.ptr8.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 7 726 store i32 %shl5.1, ptr %incdec.ptr7.1, align 4 727 %incdec.ptr9.1 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 8 728 store i32 %shl.i15.1, ptr %incdec.ptr8.1, align 4 729 %arrayidx.i.2 = getelementptr inbounds i16, ptr %pIn.019, i32 9 730 %22 = load i16, ptr %arrayidx.i.2, align 2 731 %conv.i.2 = sext i16 %22 to i32 732 %shl.i.2 = shl nsw i32 %conv.i.2, 16 733 %23 = load i16, ptr %add.ptr2.1, align 2 734 %conv22.i.2 = zext i16 %23 to i32 735 %add.ptr2.2 = getelementptr inbounds i16, ptr %pIn.019, i32 12 736 %add.ptr3.2 = getelementptr inbounds i16, ptr %pIn.019, i32 10 737 %arrayidx.i13.2 = getelementptr inbounds i16, ptr %pIn.019, i32 11 738 %24 = load i16, ptr %arrayidx.i13.2, align 2 739 %conv.i14.2 = sext i16 %24 to i32 740 %shl.i15.2 = shl nsw i32 %conv.i14.2, 16 741 %25 = load i16, ptr %add.ptr3.2, align 2 742 %conv22.i16.2 = zext i16 %25 to i32 743 %shl.2 = shl nuw i32 %conv22.i.2, 16 744 %shl5.2 = shl nuw i32 %conv22.i16.2, 16 745 %incdec.ptr.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 9 746 store i32 %shl.2, ptr %incdec.ptr9.1, align 4 747 %incdec.ptr7.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 10 748 store i32 %shl.i.2, ptr %incdec.ptr.2, align 4 749 %incdec.ptr8.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 11 750 store i32 %shl5.2, ptr %incdec.ptr7.2, align 4 751 %incdec.ptr9.2 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 12 752 store i32 %shl.i15.2, ptr %incdec.ptr8.2, align 4 753 %arrayidx.i.3 = getelementptr inbounds i16, ptr %pIn.019, i32 13 754 %26 = load i16, ptr %arrayidx.i.3, align 2 755 %conv.i.3 = sext i16 %26 to i32 756 %shl.i.3 = shl nsw i32 %conv.i.3, 16 757 %27 = load i16, ptr %add.ptr2.2, align 2 758 %conv22.i.3 = zext i16 %27 to i32 759 %add.ptr2.3 = getelementptr inbounds i16, ptr %pIn.019, i32 16 760 %add.ptr3.3 = getelementptr inbounds i16, ptr %pIn.019, i32 14 761 %arrayidx.i13.3 = getelementptr inbounds i16, ptr %pIn.019, i32 15 762 %28 = load i16, ptr %arrayidx.i13.3, align 2 763 %conv.i14.3 = sext i16 %28 to i32 764 %shl.i15.3 = shl nsw i32 %conv.i14.3, 16 765 %29 = load i16, ptr %add.ptr3.3, align 2 766 %conv22.i16.3 = zext i16 %29 to i32 767 %shl.3 = shl nuw i32 %conv22.i.3, 16 768 %shl5.3 = shl nuw i32 %conv22.i16.3, 16 769 %incdec.ptr.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 13 770 store i32 %shl.3, ptr %incdec.ptr9.2, align 4 771 %incdec.ptr7.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 14 772 store i32 %shl.i.3, ptr %incdec.ptr.3, align 4 773 %incdec.ptr8.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 15 774 store i32 %shl5.3, ptr %incdec.ptr7.3, align 4 775 %incdec.ptr9.3 = getelementptr inbounds i32, ptr %pDst.addr.021, i32 16 776 store i32 %shl.i15.3, ptr %incdec.ptr8.3, align 4 777 %dec.3 = add nsw i32 %blkCnt.020, -4 778 %cmp.not.3 = icmp eq i32 %dec.3, 0 779 br i1 %cmp.not.3, label %while.end, label %while.body 780 781while.end: ; preds = %while.body.prol.loopexit, %while.body, %entry 782 %pIn.0.lcssa = phi ptr [ %pSrc, %entry ], [ %add.ptr2.lcssa.unr, %while.body.prol.loopexit ], [ %add.ptr2.3, %while.body ] 783 %pDst.addr.0.lcssa = phi ptr [ %pDst, %entry ], [ %incdec.ptr9.lcssa.unr, %while.body.prol.loopexit ], [ %incdec.ptr9.3, %while.body ] 784 %rem = and i32 %blockSize, 3 785 %cmp11.not23 = icmp eq i32 %rem, 0 786 br i1 %cmp11.not23, label %while.end17, label %while.body12 787 788while.body12: ; preds = %while.end 789 %30 = load i16, ptr %pIn.0.lcssa, align 2 790 %conv = sext i16 %30 to i32 791 %shl14 = shl nsw i32 %conv, 16 792 store i32 %shl14, ptr %pDst.addr.0.lcssa, align 4 793 %cmp11.not = icmp eq i32 %rem, 1 794 br i1 %cmp11.not, label %while.end17, label %while.body12.1 795 796while.body12.1: ; preds = %while.body12 797 %incdec.ptr15 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 1 798 %incdec.ptr13 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 1 799 %31 = load i16, ptr %incdec.ptr13, align 2 800 %conv.1 = sext i16 %31 to i32 801 %shl14.1 = shl nsw i32 %conv.1, 16 802 store i32 %shl14.1, ptr %incdec.ptr15, align 4 803 %cmp11.not.1 = icmp eq i32 %rem, 2 804 br i1 %cmp11.not.1, label %while.end17, label %while.body12.2 805 806while.body12.2: ; preds = %while.body12.1 807 %incdec.ptr15.1 = getelementptr inbounds i32, ptr %pDst.addr.0.lcssa, i32 2 808 %incdec.ptr13.1 = getelementptr inbounds i16, ptr %pIn.0.lcssa, i32 2 809 %32 = load i16, ptr %incdec.ptr13.1, align 2 810 %conv.2 = sext i16 %32 to i32 811 %shl14.2 = shl nsw i32 %conv.2, 16 812 store i32 %shl14.2, ptr %incdec.ptr15.1, align 4 813 br label %while.end17 814 815while.end17: ; preds = %while.body12, %while.body12.1, %while.body12.2, %while.end 816 ret void 817} 818