1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+neon | FileCheck %s 3 4define <8 x i8> @v_bsli8(ptr %A, ptr %B, ptr %C) nounwind { 5; CHECK-LABEL: v_bsli8: 6; CHECK: @ %bb.0: 7; CHECK-NEXT: vmov.i8 d16, #0x3 8; CHECK-NEXT: vldr d17, [r2] 9; CHECK-NEXT: vldr d18, [r0] 10; CHECK-NEXT: vbsl d16, d18, d17 11; CHECK-NEXT: vmov r0, r1, d16 12; CHECK-NEXT: mov pc, lr 13 %tmp1 = load <8 x i8>, ptr %A 14 %tmp2 = load <8 x i8>, ptr %B 15 %tmp3 = load <8 x i8>, ptr %C 16 %tmp4 = and <8 x i8> %tmp1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 17 %tmp6 = and <8 x i8> %tmp3, <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4> 18 %tmp7 = or <8 x i8> %tmp4, %tmp6 19 ret <8 x i8> %tmp7 20} 21 22define <4 x i16> @v_bsli16(ptr %A, ptr %B, ptr %C) nounwind { 23; CHECK-LABEL: v_bsli16: 24; CHECK: @ %bb.0: 25; CHECK-NEXT: vmov.i16 d16, #0x3 26; CHECK-NEXT: vldr d17, [r2] 27; CHECK-NEXT: vldr d18, [r0] 28; CHECK-NEXT: vbsl d16, d18, d17 29; CHECK-NEXT: vmov r0, r1, d16 30; CHECK-NEXT: mov pc, lr 31 %tmp1 = load <4 x i16>, ptr %A 32 %tmp2 = load <4 x i16>, ptr %B 33 %tmp3 = load <4 x i16>, ptr %C 34 %tmp4 = and <4 x i16> %tmp1, <i16 3, i16 3, i16 3, i16 3> 35 %tmp6 = and <4 x i16> %tmp3, <i16 -4, i16 -4, i16 -4, i16 -4> 36 %tmp7 = or <4 x i16> %tmp4, %tmp6 37 ret <4 x i16> %tmp7 38} 39 40define <2 x i32> @v_bsli32(ptr %A, ptr %B, ptr %C) nounwind { 41; CHECK-LABEL: v_bsli32: 42; CHECK: @ %bb.0: 43; CHECK-NEXT: vmov.i32 d16, #0x3 44; CHECK-NEXT: vldr d17, [r2] 45; CHECK-NEXT: vldr d18, [r0] 46; CHECK-NEXT: vbsl d16, d18, d17 47; CHECK-NEXT: vmov r0, r1, d16 48; CHECK-NEXT: mov pc, lr 49 %tmp1 = load <2 x i32>, ptr %A 50 %tmp2 = load <2 x i32>, ptr %B 51 %tmp3 = load <2 x i32>, ptr %C 52 %tmp4 = and <2 x i32> %tmp1, <i32 3, i32 3> 53 %tmp6 = and <2 x i32> %tmp3, <i32 -4, i32 -4> 54 %tmp7 = or <2 x i32> %tmp4, %tmp6 55 ret <2 x i32> %tmp7 56} 57 58define <1 x i64> @v_bsli64(ptr %A, ptr %B, ptr %C) nounwind { 59; CHECK-LABEL: v_bsli64: 60; CHECK: @ %bb.0: 61; CHECK-NEXT: vldr d17, [r2] 62; CHECK-NEXT: vldr d16, LCPI3_0 63; CHECK-NEXT: vldr d18, [r0] 64; CHECK-NEXT: vbsl d16, d18, d17 65; CHECK-NEXT: vmov r0, r1, d16 66; CHECK-NEXT: mov pc, lr 67 %tmp1 = load <1 x i64>, ptr %A 68 %tmp2 = load <1 x i64>, ptr %B 69 %tmp3 = load <1 x i64>, ptr %C 70 %tmp4 = and <1 x i64> %tmp1, <i64 3> 71 %tmp6 = and <1 x i64> %tmp3, <i64 -4> 72 %tmp7 = or <1 x i64> %tmp4, %tmp6 73 ret <1 x i64> %tmp7 74} 75 76define <16 x i8> @v_bslQi8(ptr %A, ptr %B, ptr %C) nounwind { 77; CHECK-LABEL: v_bslQi8: 78; CHECK: @ %bb.0: 79; CHECK-NEXT: vld1.32 {d16, d17}, [r2] 80; CHECK-NEXT: vmov.i8 q9, #0x3 81; CHECK-NEXT: vld1.32 {d20, d21}, [r0] 82; CHECK-NEXT: vbit q8, q10, q9 83; CHECK-NEXT: vmov r0, r1, d16 84; CHECK-NEXT: vmov r2, r3, d17 85; CHECK-NEXT: mov pc, lr 86 %tmp1 = load <16 x i8>, ptr %A 87 %tmp2 = load <16 x i8>, ptr %B 88 %tmp3 = load <16 x i8>, ptr %C 89 %tmp4 = and <16 x i8> %tmp1, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> 90 %tmp6 = and <16 x i8> %tmp3, <i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4, i8 -4> 91 %tmp7 = or <16 x i8> %tmp4, %tmp6 92 ret <16 x i8> %tmp7 93} 94 95define <8 x i16> @v_bslQi16(ptr %A, ptr %B, ptr %C) nounwind { 96; CHECK-LABEL: v_bslQi16: 97; CHECK: @ %bb.0: 98; CHECK-NEXT: vld1.32 {d16, d17}, [r2] 99; CHECK-NEXT: vmov.i16 q9, #0x3 100; CHECK-NEXT: vld1.32 {d20, d21}, [r0] 101; CHECK-NEXT: vbit q8, q10, q9 102; CHECK-NEXT: vmov r0, r1, d16 103; CHECK-NEXT: vmov r2, r3, d17 104; CHECK-NEXT: mov pc, lr 105 %tmp1 = load <8 x i16>, ptr %A 106 %tmp2 = load <8 x i16>, ptr %B 107 %tmp3 = load <8 x i16>, ptr %C 108 %tmp4 = and <8 x i16> %tmp1, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3> 109 %tmp6 = and <8 x i16> %tmp3, <i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4, i16 -4> 110 %tmp7 = or <8 x i16> %tmp4, %tmp6 111 ret <8 x i16> %tmp7 112} 113 114define <4 x i32> @v_bslQi32(ptr %A, ptr %B, ptr %C) nounwind { 115; CHECK-LABEL: v_bslQi32: 116; CHECK: @ %bb.0: 117; CHECK-NEXT: vld1.32 {d16, d17}, [r2] 118; CHECK-NEXT: vmov.i32 q9, #0x3 119; CHECK-NEXT: vld1.32 {d20, d21}, [r0] 120; CHECK-NEXT: vbit q8, q10, q9 121; CHECK-NEXT: vmov r0, r1, d16 122; CHECK-NEXT: vmov r2, r3, d17 123; CHECK-NEXT: mov pc, lr 124 %tmp1 = load <4 x i32>, ptr %A 125 %tmp2 = load <4 x i32>, ptr %B 126 %tmp3 = load <4 x i32>, ptr %C 127 %tmp4 = and <4 x i32> %tmp1, <i32 3, i32 3, i32 3, i32 3> 128 %tmp6 = and <4 x i32> %tmp3, <i32 -4, i32 -4, i32 -4, i32 -4> 129 %tmp7 = or <4 x i32> %tmp4, %tmp6 130 ret <4 x i32> %tmp7 131} 132 133define <2 x i64> @v_bslQi64(ptr %A, ptr %B, ptr %C) nounwind { 134; CHECK-LABEL: v_bslQi64: 135; CHECK: @ %bb.0: 136; CHECK-NEXT: vld1.32 {d16, d17}, [r2] 137; CHECK-NEXT: vld1.32 {d18, d19}, [r0] 138; CHECK-NEXT: adr r0, LCPI7_0 139; CHECK-NEXT: vld1.64 {d20, d21}, [r0:128] 140; CHECK-NEXT: vbit q8, q9, q10 141; CHECK-NEXT: vmov r0, r1, d16 142; CHECK-NEXT: vmov r2, r3, d17 143; CHECK-NEXT: mov pc, lr 144 %tmp1 = load <2 x i64>, ptr %A 145 %tmp2 = load <2 x i64>, ptr %B 146 %tmp3 = load <2 x i64>, ptr %C 147 %tmp4 = and <2 x i64> %tmp1, <i64 3, i64 3> 148 %tmp6 = and <2 x i64> %tmp3, <i64 -4, i64 -4> 149 %tmp7 = or <2 x i64> %tmp4, %tmp6 150 ret <2 x i64> %tmp7 151} 152