1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -verify-machineinstrs -o - %s -mtriple=aarch64-linux-gnu | FileCheck %s 3 4declare <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float>) #2 5declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) #2 6 7define <8 x half> @test1(<4 x float> noundef %a) { 8; CHECK-LABEL: test1: 9; CHECK: // %bb.0: // %entry 10; CHECK-NEXT: fcvtn v0.4h, v0.4s 11; CHECK-NEXT: ret 12entry: 13 %vcvt_f16_f321.i = tail call <4 x i16> @llvm.aarch64.neon.vcvtfp2hf(<4 x float> %a) 14 %0 = bitcast <4 x i16> %vcvt_f16_f321.i to <4 x half> 15 %shuffle.i = shufflevector <4 x half> %0, <4 x half> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 16 ret <8 x half> %shuffle.i 17} 18 19define <8 x i8> @test2(ptr nocapture noundef readonly %in, <8 x i8> noundef %idx) { 20; CHECK-LABEL: test2: 21; CHECK: // %bb.0: // %entry 22; CHECK-NEXT: ldr q1, [x0] 23; CHECK-NEXT: shrn v1.8b, v1.8h, #4 24; CHECK-NEXT: tbl v0.8b, { v1.16b }, v0.8b 25; CHECK-NEXT: ret 26entry: 27 %0 = load <8 x i16>, ptr %in, align 2 28 %1 = lshr <8 x i16> %0, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> 29 %vshrn_n = trunc <8 x i16> %1 to <8 x i8> 30 %vtbl1.i = shufflevector <8 x i8> %vshrn_n, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 31 %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %idx) 32 ret <8 x i8> %vtbl11.i 33} 34 35define <8 x i8> @tbl1v8i8(ptr nocapture noundef readonly %in, <8 x i8> noundef %idx) { 36; CHECK-LABEL: tbl1v8i8: 37; CHECK: // %bb.0: // %entry 38; CHECK-NEXT: ldr q1, [x0] 39; CHECK-NEXT: shrn v1.8b, v1.8h, #4 40; CHECK-NEXT: tbl v0.8b, { v1.16b }, v0.8b 41; CHECK-NEXT: ret 42entry: 43 %0 = load <8 x i16>, ptr %in, align 2 44 %1 = lshr <8 x i16> %0, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4> 45 %vshrn_n = trunc <8 x i16> %1 to <8 x i8> 46 %vtbl1.i = shufflevector <8 x i8> %vshrn_n, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 47 %vtbl11.i = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %vtbl1.i, <8 x i8> %idx) 48 ret <8 x i8> %vtbl11.i 49} 50 51define <8 x i16> @addpv4i16(<4 x i16> noundef %a, <4 x i16> noundef %b) { 52; CHECK-LABEL: addpv4i16: 53; CHECK: // %bb.0: // %entry 54; CHECK-NEXT: addp v0.4h, v0.4h, v1.4h 55; CHECK-NEXT: ret 56entry: 57 %vpadd_v2.i = tail call <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16> %a, <4 x i16> %b) 58 %shuffle.i = shufflevector <4 x i16> %vpadd_v2.i, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 59 ret <8 x i16> %shuffle.i 60} 61 62define <8 x i16> @addv4i16(<4 x i16> noundef %a, <4 x i16> noundef %b) { 63; CHECK-LABEL: addv4i16: 64; CHECK: // %bb.0: // %entry 65; CHECK-NEXT: add v0.4h, v1.4h, v0.4h 66; CHECK-NEXT: ret 67entry: 68 %add.i = add <4 x i16> %b, %a 69 %shuffle.i = shufflevector <4 x i16> %add.i, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 70 ret <8 x i16> %shuffle.i 71} 72 73define <16 x i8> @rshrn(<8 x i16> noundef %a, <4 x i16> noundef %b) { 74; CHECK-LABEL: rshrn: 75; CHECK: // %bb.0: // %entry 76; CHECK-NEXT: rshrn v0.8b, v0.8h, #3 77; CHECK-NEXT: ret 78entry: 79 %vrshrn_n1 = tail call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %a, i32 3) 80 %shuffle.i = shufflevector <8 x i8> %vrshrn_n1, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 81 ret <16 x i8> %shuffle.i 82} 83 84define <16 x i8> @tbl1(<16 x i8> %a, <8 x i8> %b) { 85; CHECK-LABEL: tbl1: 86; CHECK: // %bb.0: // %entry 87; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b 88; CHECK-NEXT: ret 89entry: 90 %vtbl11 = tail call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %a, <8 x i8> %b) 91 %shuffle.i = shufflevector <8 x i8> %vtbl11, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 92 ret <16 x i8> %shuffle.i 93} 94 95define <2 x double> @fadd(double noundef %x, double noundef %y) { 96; CHECK-LABEL: fadd: 97; CHECK: // %bb.0: // %entry 98; CHECK-NEXT: movi v2.2d, #0000000000000000 99; CHECK-NEXT: fadd d0, d0, d1 100; CHECK-NEXT: mov v2.d[0], v0.d[0] 101; CHECK-NEXT: mov v0.16b, v2.16b 102; CHECK-NEXT: ret 103entry: 104 %add = fadd double %x, %y 105 %vecinit1 = insertelement <2 x double> poison, double %add, i64 0 106 %vecinit2 = insertelement <2 x double> %vecinit1, double 0.0, i64 1 107 ret <2 x double> %vecinit2 108} 109 110define <16 x i8> @bsl(<4 x i16> noundef %a, <4 x i16> noundef %c, <4 x i16> noundef %d, <4 x i16> noundef %b) { 111; CHECK-LABEL: bsl: 112; CHECK: // %bb.0: // %entry 113; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b 114; CHECK-NEXT: ret 115entry: 116 %vbsl3.i = and <4 x i16> %c, %a 117 %0 = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1> 118 %vbsl4.i = and <4 x i16> %0, %d 119 %vbsl5.i = or <4 x i16> %vbsl4.i, %vbsl3.i 120 %1 = bitcast <4 x i16> %vbsl5.i to <8 x i8> 121 %shuffle.i = shufflevector <8 x i8> %1, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 122 ret <16 x i8> %shuffle.i 123} 124 125define <16 x i8> @load(ptr %a, <8 x i8> %b) { 126; CHECK-LABEL: load: 127; CHECK: // %bb.0: // %entry 128; CHECK-NEXT: ldr d0, [x0] 129; CHECK-NEXT: ret 130entry: 131 %vtbl11 = load <8 x i8>, ptr %a 132 %shuffle.i = shufflevector <8 x i8> %vtbl11, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 133 ret <16 x i8> %shuffle.i 134} 135 136 137define <16 x i8> @insertzero_v8i8(<8 x i8> %a) { 138; CHECK-LABEL: insertzero_v8i8: 139; CHECK: // %bb.0: // %entry 140; CHECK-NEXT: fmov d0, d0 141; CHECK-NEXT: ret 142entry: 143 %shuffle.i = shufflevector <8 x i8> %a, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 144 ret <16 x i8> %shuffle.i 145} 146 147define <8 x i16> @insertzero_v4i16(<4 x i16> %a) { 148; CHECK-LABEL: insertzero_v4i16: 149; CHECK: // %bb.0: // %entry 150; CHECK-NEXT: fmov d0, d0 151; CHECK-NEXT: ret 152entry: 153 %shuffle.i = shufflevector <4 x i16> %a, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 154 ret <8 x i16> %shuffle.i 155} 156 157define <4 x i32> @insertzero_v2i32(<2 x i32> %a) { 158; CHECK-LABEL: insertzero_v2i32: 159; CHECK: // %bb.0: // %entry 160; CHECK-NEXT: fmov d0, d0 161; CHECK-NEXT: ret 162entry: 163 %shuffle.i = shufflevector <2 x i32> %a, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 164 ret <4 x i32> %shuffle.i 165} 166 167define <2 x i64> @insertzero_v1i64(<1 x i64> %a) { 168; CHECK-LABEL: insertzero_v1i64: 169; CHECK: // %bb.0: // %entry 170; CHECK-NEXT: fmov d0, d0 171; CHECK-NEXT: ret 172entry: 173 %shuffle.i = shufflevector <1 x i64> %a, <1 x i64> zeroinitializer, <2 x i32> <i32 0, i32 1> 174 ret <2 x i64> %shuffle.i 175} 176 177define <8 x half> @insertzero_v4f16(<4 x half> %a) { 178; CHECK-LABEL: insertzero_v4f16: 179; CHECK: // %bb.0: // %entry 180; CHECK-NEXT: fmov d0, d0 181; CHECK-NEXT: ret 182entry: 183 %shuffle.i = shufflevector <4 x half> %a, <4 x half> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 184 ret <8 x half> %shuffle.i 185} 186 187define <8 x bfloat> @insertzero_v4bf16(<4 x bfloat> %a) { 188; CHECK-LABEL: insertzero_v4bf16: 189; CHECK: // %bb.0: // %entry 190; CHECK-NEXT: fmov d0, d0 191; CHECK-NEXT: ret 192entry: 193 %shuffle.i = shufflevector <4 x bfloat> %a, <4 x bfloat> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 194 ret <8 x bfloat> %shuffle.i 195} 196 197define <4 x float> @insertzero_v2f32(<2 x float> %a) { 198; CHECK-LABEL: insertzero_v2f32: 199; CHECK: // %bb.0: // %entry 200; CHECK-NEXT: fmov d0, d0 201; CHECK-NEXT: ret 202entry: 203 %shuffle.i = shufflevector <2 x float> %a, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 204 ret <4 x float> %shuffle.i 205} 206 207define <2 x double> @insertzero_v1f64(<1 x double> %a) { 208; CHECK-LABEL: insertzero_v1f64: 209; CHECK: // %bb.0: // %entry 210; CHECK-NEXT: fmov d0, d0 211; CHECK-NEXT: ret 212entry: 213 %shuffle.i = shufflevector <1 x double> %a, <1 x double> zeroinitializer, <2 x i32> <i32 0, i32 1> 214 ret <2 x double> %shuffle.i 215} 216 217 218 219declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) 220declare <4 x i16> @llvm.aarch64.neon.addp.v4i16(<4 x i16>, <4 x i16>) 221