1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=armv7-eabihf -mattr=+neon -verify-machineinstrs %s -o - | FileCheck %s 3 4define <8 x i8> @v_bsli8(ptr %A, ptr %B, ptr %C) nounwind { 5; CHECK-LABEL: v_bsli8: 6; CHECK: @ %bb.0: 7; CHECK-NEXT: vldr d18, [r0] 8; CHECK-NEXT: vldr d16, [r2] 9; CHECK-NEXT: vorr d0, d18, d18 10; CHECK-NEXT: vldr d17, [r1] 11; CHECK-NEXT: vbsl d0, d17, d16 12; CHECK-NEXT: bx lr 13 %tmp1 = load <8 x i8>, ptr %A 14 %tmp2 = load <8 x i8>, ptr %B 15 %tmp3 = load <8 x i8>, ptr %C 16 %tmp4 = and <8 x i8> %tmp1, %tmp2 17 %tmp5 = xor <8 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > 18 %tmp6 = and <8 x i8> %tmp5, %tmp3 19 %tmp7 = or <8 x i8> %tmp4, %tmp6 20 ret <8 x i8> %tmp7 21} 22 23define <4 x i16> @v_bsli16(ptr %A, ptr %B, ptr %C) nounwind { 24; CHECK-LABEL: v_bsli16: 25; CHECK: @ %bb.0: 26; CHECK-NEXT: vldr d18, [r0] 27; CHECK-NEXT: vldr d16, [r2] 28; CHECK-NEXT: vorr d0, d18, d18 29; CHECK-NEXT: vldr d17, [r1] 30; CHECK-NEXT: vbsl d0, d17, d16 31; CHECK-NEXT: bx lr 32 %tmp1 = load <4 x i16>, ptr %A 33 %tmp2 = load <4 x i16>, ptr %B 34 %tmp3 = load <4 x i16>, ptr %C 35 %tmp4 = and <4 x i16> %tmp1, %tmp2 36 %tmp5 = xor <4 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1 > 37 %tmp6 = and <4 x i16> %tmp5, %tmp3 38 %tmp7 = or <4 x i16> %tmp4, %tmp6 39 ret <4 x i16> %tmp7 40} 41 42define <2 x i32> @v_bsli32(ptr %A, ptr %B, ptr %C) nounwind { 43; CHECK-LABEL: v_bsli32: 44; CHECK: @ %bb.0: 45; CHECK-NEXT: vldr d18, [r0] 46; CHECK-NEXT: vldr d16, [r2] 47; CHECK-NEXT: vorr d0, d18, d18 48; CHECK-NEXT: vldr d17, [r1] 49; CHECK-NEXT: vbsl d0, d17, d16 50; CHECK-NEXT: bx lr 51 %tmp1 = load <2 x i32>, ptr %A 52 %tmp2 = load <2 x i32>, ptr %B 53 %tmp3 = load <2 x i32>, ptr %C 54 %tmp4 = and <2 x i32> %tmp1, %tmp2 55 %tmp5 = xor <2 x i32> %tmp1, < i32 -1, i32 -1 > 56 %tmp6 = and <2 x i32> %tmp5, %tmp3 57 %tmp7 = or <2 x i32> %tmp4, %tmp6 58 ret <2 x i32> %tmp7 59} 60 61define <1 x i64> @v_bsli64(ptr %A, ptr %B, ptr %C) nounwind { 62; CHECK-LABEL: v_bsli64: 63; CHECK: @ %bb.0: 64; CHECK-NEXT: vldr d18, [r0] 65; CHECK-NEXT: vldr d16, [r2] 66; CHECK-NEXT: vorr d0, d18, d18 67; CHECK-NEXT: vldr d17, [r1] 68; CHECK-NEXT: vbsl d0, d17, d16 69; CHECK-NEXT: bx lr 70 %tmp1 = load <1 x i64>, ptr %A 71 %tmp2 = load <1 x i64>, ptr %B 72 %tmp3 = load <1 x i64>, ptr %C 73 %tmp4 = and <1 x i64> %tmp1, %tmp2 74 %tmp5 = xor <1 x i64> %tmp1, < i64 -1 > 75 %tmp6 = and <1 x i64> %tmp5, %tmp3 76 %tmp7 = or <1 x i64> %tmp4, %tmp6 77 ret <1 x i64> %tmp7 78} 79 80define <16 x i8> @v_bslQi8(ptr %A, ptr %B, ptr %C) nounwind { 81; CHECK-LABEL: v_bslQi8: 82; CHECK: @ %bb.0: 83; CHECK-NEXT: vld1.64 {d20, d21}, [r0] 84; CHECK-NEXT: vorr q0, q10, q10 85; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 86; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 87; CHECK-NEXT: vbsl q0, q9, q8 88; CHECK-NEXT: bx lr 89 %tmp1 = load <16 x i8>, ptr %A 90 %tmp2 = load <16 x i8>, ptr %B 91 %tmp3 = load <16 x i8>, ptr %C 92 %tmp4 = and <16 x i8> %tmp1, %tmp2 93 %tmp5 = xor <16 x i8> %tmp1, < i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1 > 94 %tmp6 = and <16 x i8> %tmp5, %tmp3 95 %tmp7 = or <16 x i8> %tmp4, %tmp6 96 ret <16 x i8> %tmp7 97} 98 99define <8 x i16> @v_bslQi16(ptr %A, ptr %B, ptr %C) nounwind { 100; CHECK-LABEL: v_bslQi16: 101; CHECK: @ %bb.0: 102; CHECK-NEXT: vld1.64 {d20, d21}, [r0] 103; CHECK-NEXT: vorr q0, q10, q10 104; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 105; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 106; CHECK-NEXT: vbsl q0, q9, q8 107; CHECK-NEXT: bx lr 108 %tmp1 = load <8 x i16>, ptr %A 109 %tmp2 = load <8 x i16>, ptr %B 110 %tmp3 = load <8 x i16>, ptr %C 111 %tmp4 = and <8 x i16> %tmp1, %tmp2 112 %tmp5 = xor <8 x i16> %tmp1, < i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1 > 113 %tmp6 = and <8 x i16> %tmp5, %tmp3 114 %tmp7 = or <8 x i16> %tmp4, %tmp6 115 ret <8 x i16> %tmp7 116} 117 118define <4 x i32> @v_bslQi32(ptr %A, ptr %B, ptr %C) nounwind { 119; CHECK-LABEL: v_bslQi32: 120; CHECK: @ %bb.0: 121; CHECK-NEXT: vld1.64 {d20, d21}, [r0] 122; CHECK-NEXT: vorr q0, q10, q10 123; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 124; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 125; CHECK-NEXT: vbsl q0, q9, q8 126; CHECK-NEXT: bx lr 127 %tmp1 = load <4 x i32>, ptr %A 128 %tmp2 = load <4 x i32>, ptr %B 129 %tmp3 = load <4 x i32>, ptr %C 130 %tmp4 = and <4 x i32> %tmp1, %tmp2 131 %tmp5 = xor <4 x i32> %tmp1, < i32 -1, i32 -1, i32 -1, i32 -1 > 132 %tmp6 = and <4 x i32> %tmp5, %tmp3 133 %tmp7 = or <4 x i32> %tmp4, %tmp6 134 ret <4 x i32> %tmp7 135} 136 137define <2 x i64> @v_bslQi64(ptr %A, ptr %B, ptr %C) nounwind { 138; CHECK-LABEL: v_bslQi64: 139; CHECK: @ %bb.0: 140; CHECK-NEXT: vld1.64 {d20, d21}, [r0] 141; CHECK-NEXT: vorr q0, q10, q10 142; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 143; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 144; CHECK-NEXT: vbsl q0, q9, q8 145; CHECK-NEXT: bx lr 146 %tmp1 = load <2 x i64>, ptr %A 147 %tmp2 = load <2 x i64>, ptr %B 148 %tmp3 = load <2 x i64>, ptr %C 149 %tmp4 = and <2 x i64> %tmp1, %tmp2 150 %tmp5 = xor <2 x i64> %tmp1, < i64 -1, i64 -1 > 151 %tmp6 = and <2 x i64> %tmp5, %tmp3 152 %tmp7 = or <2 x i64> %tmp4, %tmp6 153 ret <2 x i64> %tmp7 154} 155 156define <8 x i8> @f1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind readnone optsize ssp { 157; CHECK-LABEL: f1: 158; CHECK: @ %bb.0: 159; CHECK-NEXT: vbsl d0, d1, d2 160; CHECK-NEXT: bx lr 161 %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind 162 ret <8 x i8> %vbsl.i 163} 164 165define <4 x i16> @f2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp { 166; CHECK-LABEL: f2: 167; CHECK: @ %bb.0: 168; CHECK-NEXT: vbsl d0, d1, d2 169; CHECK-NEXT: bx lr 170 %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind 171 ret <4 x i16> %vbsl3.i 172} 173 174define <2 x i32> @f3(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp { 175; CHECK-LABEL: f3: 176; CHECK: @ %bb.0: 177; CHECK-NEXT: vbsl d0, d1, d2 178; CHECK-NEXT: bx lr 179 %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind 180 ret <2 x i32> %vbsl3.i 181} 182 183define <2 x float> @f4(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone optsize ssp { 184; CHECK-LABEL: f4: 185; CHECK: @ %bb.0: 186; CHECK-NEXT: vbsl d0, d1, d2 187; CHECK-NEXT: bx lr 188 %vbsl4.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind 189 ret <2 x float> %vbsl4.i 190} 191 192define <16 x i8> @g1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind readnone optsize ssp { 193; CHECK-LABEL: g1: 194; CHECK: @ %bb.0: 195; CHECK-NEXT: vbsl q0, q1, q2 196; CHECK-NEXT: bx lr 197 %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind 198 ret <16 x i8> %vbsl.i 199} 200 201define <8 x i16> @g2(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone optsize ssp { 202; CHECK-LABEL: g2: 203; CHECK: @ %bb.0: 204; CHECK-NEXT: vbsl q0, q1, q2 205; CHECK-NEXT: bx lr 206 %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind 207 ret <8 x i16> %vbsl3.i 208} 209 210define <4 x i32> @g3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { 211; CHECK-LABEL: g3: 212; CHECK: @ %bb.0: 213; CHECK-NEXT: vbsl q0, q1, q2 214; CHECK-NEXT: bx lr 215 %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind 216 ret <4 x i32> %vbsl3.i 217} 218 219define <4 x float> @g4(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone optsize ssp { 220; CHECK-LABEL: g4: 221; CHECK: @ %bb.0: 222; CHECK-NEXT: vbsl q0, q1, q2 223; CHECK-NEXT: bx lr 224 %vbsl4.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind 225 ret <4 x float> %vbsl4.i 226} 227 228define <1 x i64> @test_vbsl_s64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind readnone optsize ssp { 229; CHECK-LABEL: test_vbsl_s64: 230; CHECK: @ %bb.0: 231; CHECK-NEXT: vbsl d0, d1, d2 232; CHECK-NEXT: bx lr 233 %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind 234 ret <1 x i64> %vbsl3.i 235} 236 237define <1 x i64> @test_vbsl_u64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind readnone optsize ssp { 238; CHECK-LABEL: test_vbsl_u64: 239; CHECK: @ %bb.0: 240; CHECK-NEXT: vbsl d0, d1, d2 241; CHECK-NEXT: bx lr 242 %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind 243 ret <1 x i64> %vbsl3.i 244} 245 246define <2 x i64> @test_vbslq_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { 247; CHECK-LABEL: test_vbslq_s64: 248; CHECK: @ %bb.0: 249; CHECK-NEXT: vbsl q0, q1, q2 250; CHECK-NEXT: bx lr 251 %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind 252 ret <2 x i64> %vbsl3.i 253} 254 255define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { 256; CHECK-LABEL: test_vbslq_u64: 257; CHECK: @ %bb.0: 258; CHECK-NEXT: vbsl q0, q1, q2 259; CHECK-NEXT: bx lr 260 %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind 261 ret <2 x i64> %vbsl3.i 262} 263 264define <8 x i8> @same_param_all(<8 x i8> %a, <8 x i8> %b) { 265; CHECK-LABEL: same_param_all: 266; CHECK: @ %bb.0: 267; CHECK-NEXT: vmov.f64 d0, d1 268; CHECK-NEXT: bx lr 269 %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %b, <8 x i8> %b, <8 x i8> %b) 270 ret <8 x i8> %vbsl.i 271} 272 273define <8 x i8> @same_param_12(<8 x i8> %a, <8 x i8> %b) { 274; CHECK-LABEL: same_param_12: 275; CHECK: @ %bb.0: 276; CHECK-NEXT: vmov.f64 d0, d1 277; CHECK-NEXT: bx lr 278 %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %b) 279 ret <8 x i8> %vbsl.i 280} 281 282define <8 x i8> @same_param_01(<8 x i8> %a, <8 x i8> %b) { 283; CHECK-LABEL: same_param_01: 284; CHECK: @ %bb.0: 285; CHECK-NEXT: vbif d0, d1, d0 286; CHECK-NEXT: bx lr 287 %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %a, <8 x i8> %b) 288 ret <8 x i8> %vbsl.i 289} 290 291declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 292declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone 293declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 294declare <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) nounwind readnone 295declare <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) nounwind readnone 296declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) nounwind readnone 297declare <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone 298declare <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float>, <4 x float>, <4 x float>) nounwind readnone 299declare <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone 300declare <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>) nounwind readnone 301