1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv7s-none-eabi %s -o - | FileCheck %s 3 4declare <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %shuffle.i.i307, <8 x i8> %shuffle.i27.i308, <8 x i8> %vtbl2.i25.i) 5 6; Check that we get the motivating example: 7; The bitcasts force the values to go through the GPRs, whereas 8; they are defined on VPRs and used on VPRs. 9; 10define void @motivatingExample(ptr %addr, ptr %addr2) { 11; CHECK-LABEL: motivatingExample: 12; CHECK: @ %bb.0: 13; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 14; CHECK-NEXT: vldr d18, [r1] 15; CHECK-NEXT: vtbl.8 d16, {d16, d17}, d18 16; CHECK-NEXT: vstr d16, [r1] 17; CHECK-NEXT: bx lr 18 %shuffle.i.bc.i309 = load <2 x i64>, ptr %addr 19 %vtbl2.i25.i = load <8 x i8>, ptr %addr2 20 %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0 21 %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1 22 %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8> 23 %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8> 24 %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i) 25 store <8 x i8> %vtbl2.i25.i313, ptr %addr2 26 ret void 27} 28 29; Check that we do not perform the transformation for dynamic index. 30define void @dynamicIndex(ptr %addr, ptr %addr2, i32 %index) { 31; CHECK-LABEL: dynamicIndex: 32; CHECK: @ %bb.0: 33; CHECK-NEXT: .save {r4, r6, r7, lr} 34; CHECK-NEXT: push {r4, r6, r7, lr} 35; CHECK-NEXT: .setfp r7, sp, #8 36; CHECK-NEXT: add r7, sp, #8 37; CHECK-NEXT: .pad #16 38; CHECK-NEXT: sub sp, #16 39; CHECK-NEXT: mov r4, sp 40; CHECK-NEXT: bfc r4, #0, #4 41; CHECK-NEXT: mov sp, r4 42; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 43; CHECK-NEXT: adds r0, r2, r2 44; CHECK-NEXT: and r2, r0, #3 45; CHECK-NEXT: adds r0, #1 46; CHECK-NEXT: mov r12, sp 47; CHECK-NEXT: and r0, r0, #3 48; CHECK-NEXT: lsls r2, r2, #2 49; CHECK-NEXT: mov r3, r12 50; CHECK-NEXT: vst1.64 {d16, d17}, [r3:128], r2 51; CHECK-NEXT: orr.w r0, r12, r0, lsl #2 52; CHECK-NEXT: sub.w r4, r7, #8 53; CHECK-NEXT: ldr r2, [r3] 54; CHECK-NEXT: ldr r0, [r0] 55; CHECK-NEXT: vldr d18, [r1] 56; CHECK-NEXT: vmov d16, r2, r0 57; CHECK-NEXT: vtbl.8 d16, {d16, d17}, d18 58; CHECK-NEXT: vstr d16, [r1] 59; CHECK-NEXT: mov sp, r4 60; CHECK-NEXT: pop {r4, r6, r7, pc} 61 %shuffle.i.bc.i309 = load <2 x i64>, ptr %addr 62 %vtbl2.i25.i = load <8 x i8>, ptr %addr2 63 %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 %index 64 %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1 65 %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8> 66 %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8> 67 %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i) 68 store <8 x i8> %vtbl2.i25.i313, ptr %addr2 69 ret void 70} 71 72; Check that we do not perform the transformation when there are several uses 73; of the result of the bitcast. 74define i64 @severalUses(ptr %addr, ptr %addr2) { 75; CHECK-LABEL: severalUses: 76; CHECK: @ %bb.0: 77; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 78; CHECK-NEXT: vmov r0, r2, d16 79; CHECK-NEXT: vldr d18, [r1] 80; CHECK-NEXT: vtbl.8 d16, {d16, d17}, d18 81; CHECK-NEXT: vstr d16, [r1] 82; CHECK-NEXT: mov r1, r2 83; CHECK-NEXT: bx lr 84 %shuffle.i.bc.i309 = load <2 x i64>, ptr %addr 85 %vtbl2.i25.i = load <8 x i8>, ptr %addr2 86 %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0 87 %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1 88 %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8> 89 %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8> 90 %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i) 91 store <8 x i8> %vtbl2.i25.i313, ptr %addr2 92 ret i64 %shuffle.i.extract.i310 93} 94