1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone | FileCheck %s 3 4define i32 @qadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { 5; CHECK-LABEL: qadds: 6; CHECK: // %bb.0: 7; CHECK-NEXT: sqadd s0, s0, s1 8; CHECK-NEXT: fmov w0, s0 9; CHECK-NEXT: ret 10 %vecext = extractelement <4 x i32> %b, i32 0 11 %vecext1 = extractelement <4 x i32> %c, i32 0 12 %vqadd.i = tail call i32 @llvm.aarch64.neon.sqadd.i32(i32 %vecext, i32 %vecext1) nounwind 13 ret i32 %vqadd.i 14} 15 16define i64 @qaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { 17; CHECK-LABEL: qaddd: 18; CHECK: // %bb.0: 19; CHECK-NEXT: sqadd d0, d0, d1 20; CHECK-NEXT: fmov x0, d0 21; CHECK-NEXT: ret 22 %vecext = extractelement <2 x i64> %b, i32 0 23 %vecext1 = extractelement <2 x i64> %c, i32 0 24 %vqadd.i = tail call i64 @llvm.aarch64.neon.sqadd.i64(i64 %vecext, i64 %vecext1) nounwind 25 ret i64 %vqadd.i 26} 27 28define i32 @uqadds(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { 29; CHECK-LABEL: uqadds: 30; CHECK: // %bb.0: 31; CHECK-NEXT: uqadd s0, s0, s1 32; CHECK-NEXT: fmov w0, s0 33; CHECK-NEXT: ret 34 %vecext = extractelement <4 x i32> %b, i32 0 35 %vecext1 = extractelement <4 x i32> %c, i32 0 36 %vqadd.i = tail call i32 @llvm.aarch64.neon.uqadd.i32(i32 %vecext, i32 %vecext1) nounwind 37 ret i32 %vqadd.i 38} 39 40define i64 @uqaddd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { 41; CHECK-LABEL: uqaddd: 42; CHECK: // %bb.0: 43; CHECK-NEXT: uqadd d0, d0, d1 44; CHECK-NEXT: fmov x0, d0 45; CHECK-NEXT: ret 46 %vecext = extractelement <2 x i64> %b, i32 0 47 %vecext1 = extractelement <2 x i64> %c, i32 0 48 %vqadd.i = tail call i64 @llvm.aarch64.neon.uqadd.i64(i64 %vecext, i64 %vecext1) nounwind 49 ret i64 %vqadd.i 50} 51 52declare i64 @llvm.aarch64.neon.uqadd.i64(i64, i64) nounwind readnone 53declare i32 @llvm.aarch64.neon.uqadd.i32(i32, i32) nounwind readnone 54declare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64) nounwind readnone 55declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) nounwind readnone 56 57define i32 @qsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { 58; CHECK-LABEL: qsubs: 59; CHECK: // %bb.0: 60; CHECK-NEXT: sqsub s0, s0, s1 61; CHECK-NEXT: fmov w0, s0 62; CHECK-NEXT: ret 63 %vecext = extractelement <4 x i32> %b, i32 0 64 %vecext1 = extractelement <4 x i32> %c, i32 0 65 %vqsub.i = tail call i32 @llvm.aarch64.neon.sqsub.i32(i32 %vecext, i32 %vecext1) nounwind 66 ret i32 %vqsub.i 67} 68 69define i64 @qsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { 70; CHECK-LABEL: qsubd: 71; CHECK: // %bb.0: 72; CHECK-NEXT: sqsub d0, d0, d1 73; CHECK-NEXT: fmov x0, d0 74; CHECK-NEXT: ret 75 %vecext = extractelement <2 x i64> %b, i32 0 76 %vecext1 = extractelement <2 x i64> %c, i32 0 77 %vqsub.i = tail call i64 @llvm.aarch64.neon.sqsub.i64(i64 %vecext, i64 %vecext1) nounwind 78 ret i64 %vqsub.i 79} 80 81define i32 @uqsubs(<4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp { 82; CHECK-LABEL: uqsubs: 83; CHECK: // %bb.0: 84; CHECK-NEXT: uqsub s0, s0, s1 85; CHECK-NEXT: fmov w0, s0 86; CHECK-NEXT: ret 87 %vecext = extractelement <4 x i32> %b, i32 0 88 %vecext1 = extractelement <4 x i32> %c, i32 0 89 %vqsub.i = tail call i32 @llvm.aarch64.neon.uqsub.i32(i32 %vecext, i32 %vecext1) nounwind 90 ret i32 %vqsub.i 91} 92 93define i64 @uqsubd(<2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp { 94; CHECK-LABEL: uqsubd: 95; CHECK: // %bb.0: 96; CHECK-NEXT: uqsub d0, d0, d1 97; CHECK-NEXT: fmov x0, d0 98; CHECK-NEXT: ret 99 %vecext = extractelement <2 x i64> %b, i32 0 100 %vecext1 = extractelement <2 x i64> %c, i32 0 101 %vqsub.i = tail call i64 @llvm.aarch64.neon.uqsub.i64(i64 %vecext, i64 %vecext1) nounwind 102 ret i64 %vqsub.i 103} 104 105declare i64 @llvm.aarch64.neon.uqsub.i64(i64, i64) nounwind readnone 106declare i32 @llvm.aarch64.neon.uqsub.i32(i32, i32) nounwind readnone 107declare i64 @llvm.aarch64.neon.sqsub.i64(i64, i64) nounwind readnone 108declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) nounwind readnone 109 110define i32 @qabss(<4 x i32> %b, <4 x i32> %c) nounwind readnone { 111; CHECK-LABEL: qabss: 112; CHECK: // %bb.0: 113; CHECK-NEXT: sqabs s0, s0 114; CHECK-NEXT: fmov w0, s0 115; CHECK-NEXT: ret 116 %vecext = extractelement <4 x i32> %b, i32 0 117 %vqabs.i = tail call i32 @llvm.aarch64.neon.sqabs.i32(i32 %vecext) nounwind 118 ret i32 %vqabs.i 119} 120 121define i64 @qabsd(<2 x i64> %b, <2 x i64> %c) nounwind readnone { 122; CHECK-LABEL: qabsd: 123; CHECK: // %bb.0: 124; CHECK-NEXT: sqabs d0, d0 125; CHECK-NEXT: fmov x0, d0 126; CHECK-NEXT: ret 127 %vecext = extractelement <2 x i64> %b, i32 0 128 %vqabs.i = tail call i64 @llvm.aarch64.neon.sqabs.i64(i64 %vecext) nounwind 129 ret i64 %vqabs.i 130} 131 132define i32 @qnegs(<4 x i32> %b, <4 x i32> %c) nounwind readnone { 133; CHECK-LABEL: qnegs: 134; CHECK: // %bb.0: 135; CHECK-NEXT: sqneg s0, s0 136; CHECK-NEXT: fmov w0, s0 137; CHECK-NEXT: ret 138 %vecext = extractelement <4 x i32> %b, i32 0 139 %vqneg.i = tail call i32 @llvm.aarch64.neon.sqneg.i32(i32 %vecext) nounwind 140 ret i32 %vqneg.i 141} 142 143define i64 @qnegd(<2 x i64> %b, <2 x i64> %c) nounwind readnone { 144; CHECK-LABEL: qnegd: 145; CHECK: // %bb.0: 146; CHECK-NEXT: sqneg d0, d0 147; CHECK-NEXT: fmov x0, d0 148; CHECK-NEXT: ret 149 %vecext = extractelement <2 x i64> %b, i32 0 150 %vqneg.i = tail call i64 @llvm.aarch64.neon.sqneg.i64(i64 %vecext) nounwind 151 ret i64 %vqneg.i 152} 153 154declare i64 @llvm.aarch64.neon.sqneg.i64(i64) nounwind readnone 155declare i32 @llvm.aarch64.neon.sqneg.i32(i32) nounwind readnone 156declare i64 @llvm.aarch64.neon.sqabs.i64(i64) nounwind readnone 157declare i32 @llvm.aarch64.neon.sqabs.i32(i32) nounwind readnone 158 159 160define i32 @vqmovund(<2 x i64> %b) nounwind readnone { 161; CHECK-LABEL: vqmovund: 162; CHECK: // %bb.0: 163; CHECK-NEXT: sqxtun s0, d0 164; CHECK-NEXT: fmov w0, s0 165; CHECK-NEXT: ret 166 %vecext = extractelement <2 x i64> %b, i32 0 167 %vqmovun.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %vecext) nounwind 168 ret i32 %vqmovun.i 169} 170 171define i32 @vqmovnd_s(<2 x i64> %b) nounwind readnone { 172; CHECK-LABEL: vqmovnd_s: 173; CHECK: // %bb.0: 174; CHECK-NEXT: sqxtn s0, d0 175; CHECK-NEXT: fmov w0, s0 176; CHECK-NEXT: ret 177 %vecext = extractelement <2 x i64> %b, i32 0 178 %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %vecext) nounwind 179 ret i32 %vqmovn.i 180} 181 182define i32 @vqmovnd_u(<2 x i64> %b) nounwind readnone { 183; CHECK-LABEL: vqmovnd_u: 184; CHECK: // %bb.0: 185; CHECK-NEXT: uqxtn s0, d0 186; CHECK-NEXT: fmov w0, s0 187; CHECK-NEXT: ret 188 %vecext = extractelement <2 x i64> %b, i32 0 189 %vqmovn.i = tail call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %vecext) nounwind 190 ret i32 %vqmovn.i 191} 192 193define i32 @uqxtn_ext(<4 x i32> noundef %a, <4 x i32> noundef %b, i32 %c, float %d, <2 x i64> %e) { 194; CHECK-LABEL: uqxtn_ext: 195; CHECK: // %bb.0: // %entry 196; CHECK-NEXT: mov v0.d[0], v3.d[1] 197; CHECK-NEXT: uqxtn s0, d0 198; CHECK-NEXT: fmov w0, s0 199; CHECK-NEXT: ret 200entry: 201 %e1 = extractelement <2 x i64> %e, i64 1 202 %r = tail call i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64 %e1) 203 ret i32 %r 204} 205 206define <4 x i32> @sqxtn_ins(<4 x i32> noundef %a, i64 %c) { 207; CHECK-LABEL: sqxtn_ins: 208; CHECK: // %bb.0: // %entry 209; CHECK-NEXT: fmov d1, x0 210; CHECK-NEXT: sqxtn s1, d1 211; CHECK-NEXT: mov v0.s[3], v1.s[0] 212; CHECK-NEXT: ret 213entry: 214 %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %c) 215 %vecins = insertelement <4 x i32> %a, i32 %vqmovnd_s64.i, i64 3 216 ret <4 x i32> %vecins 217} 218 219define <4 x i32> @sqxtun_insext(<4 x i32> noundef %a, <2 x i64> %e) { 220; CHECK-LABEL: sqxtun_insext: 221; CHECK: // %bb.0: // %entry 222; CHECK-NEXT: mov v1.d[0], v1.d[1] 223; CHECK-NEXT: sqxtun s1, d1 224; CHECK-NEXT: mov v0.s[3], v1.s[0] 225; CHECK-NEXT: ret 226entry: 227 %c = extractelement <2 x i64> %e, i64 1 228 %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64 %c) 229 %vecins = insertelement <4 x i32> %a, i32 %vqmovnd_s64.i, i64 3 230 ret <4 x i32> %vecins 231} 232 233define <4 x i32> @saddluse(<4 x i32> noundef %a, <4 x i32> noundef %b, i32 %c, float %d, <2 x i64> %e) { 234; CHECK-LABEL: saddluse: 235; CHECK: // %bb.0: // %entry 236; CHECK-NEXT: saddlv d1, v1.4s 237; CHECK-NEXT: sqxtn s1, d1 238; CHECK-NEXT: mov v0.s[1], v1.s[0] 239; CHECK-NEXT: ret 240entry: 241 %vaddlvq_s32.i = tail call i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32> %b) 242 %vqmovnd_s64.i = tail call i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64 %vaddlvq_s32.i) 243 %vecins = insertelement <4 x i32> %a, i32 %vqmovnd_s64.i, i64 1 244 ret <4 x i32> %vecins 245} 246 247declare i32 @llvm.aarch64.neon.scalar.uqxtn.i32.i64(i64) nounwind readnone 248declare i32 @llvm.aarch64.neon.scalar.sqxtn.i32.i64(i64) nounwind readnone 249declare i32 @llvm.aarch64.neon.scalar.sqxtun.i32.i64(i64) nounwind readnone 250declare i64 @llvm.aarch64.neon.saddlv.i64.v4i32(<4 x i32>) 251