1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=arm-eabi -mattr=+v8.2a,+neon,+fullfp16 -float-abi=hard < %s | FileCheck %s 3 4%struct.float16x4x2_t = type { [2 x <4 x half>] } 5%struct.float16x8x2_t = type { [2 x <8 x half>] } 6 7define dso_local <4 x half> @test_vabs_f16(<4 x half> %a) { 8; CHECKLABEL: test_vabs_f16: 9; CHECK-LABEL: test_vabs_f16: 10; CHECK: @ %bb.0: @ %entry 11; CHECK-NEXT: vabs.f16 d0, d0 12; CHECK-NEXT: bx lr 13entry: 14 %vabs1.i = tail call <4 x half> @llvm.fabs.v4f16(<4 x half> %a) 15 ret <4 x half> %vabs1.i 16} 17 18define dso_local <8 x half> @test_vabsq_f16(<8 x half> %a) { 19; CHECKLABEL: test_vabsq_f16: 20; CHECK-LABEL: test_vabsq_f16: 21; CHECK: @ %bb.0: @ %entry 22; CHECK-NEXT: vabs.f16 q0, q0 23; CHECK-NEXT: bx lr 24entry: 25 %vabs1.i = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) 26 ret <8 x half> %vabs1.i 27} 28 29define dso_local <4 x i16> @test_vceqz_f16(<4 x half> %a) { 30; CHECKLABEL: test_vceqz_f16: 31; CHECK-LABEL: test_vceqz_f16: 32; CHECK: @ %bb.0: @ %entry 33; CHECK-NEXT: vceq.f16 d0, d0, #0 34; CHECK-NEXT: bx lr 35entry: 36 %0 = fcmp oeq <4 x half> %a, zeroinitializer 37 %vceqz.i = sext <4 x i1> %0 to <4 x i16> 38 ret <4 x i16> %vceqz.i 39} 40 41define dso_local <8 x i16> @test_vceqzq_f16(<8 x half> %a) { 42; CHECKLABEL: test_vceqzq_f16: 43; CHECK-LABEL: test_vceqzq_f16: 44; CHECK: @ %bb.0: @ %entry 45; CHECK-NEXT: vceq.f16 q0, q0, #0 46; CHECK-NEXT: bx lr 47entry: 48 %0 = fcmp oeq <8 x half> %a, zeroinitializer 49 %vceqz.i = sext <8 x i1> %0 to <8 x i16> 50 ret <8 x i16> %vceqz.i 51} 52 53define dso_local <4 x i16> @test_vcgez_f16(<4 x half> %a) { 54; CHECKLABEL: test_vcgez_f16: 55; CHECK-LABEL: test_vcgez_f16: 56; CHECK: @ %bb.0: @ %entry 57; CHECK-NEXT: vcge.f16 d0, d0, #0 58; CHECK-NEXT: bx lr 59entry: 60 %0 = fcmp oge <4 x half> %a, zeroinitializer 61 %vcgez.i = sext <4 x i1> %0 to <4 x i16> 62 ret <4 x i16> %vcgez.i 63} 64 65define dso_local <8 x i16> @test_vcgezq_f16(<8 x half> %a) { 66; CHECKLABEL: test_vcgezq_f16: 67; CHECK-LABEL: test_vcgezq_f16: 68; CHECK: @ %bb.0: @ %entry 69; CHECK-NEXT: vcge.f16 q0, q0, #0 70; CHECK-NEXT: bx lr 71entry: 72 %0 = fcmp oge <8 x half> %a, zeroinitializer 73 %vcgez.i = sext <8 x i1> %0 to <8 x i16> 74 ret <8 x i16> %vcgez.i 75} 76 77define dso_local <4 x i16> @test_vcgtz_f16(<4 x half> %a) { 78; CHECKLABEL: test_vcgtz_f16: 79; CHECK-LABEL: test_vcgtz_f16: 80; CHECK: @ %bb.0: @ %entry 81; CHECK-NEXT: vcgt.f16 d0, d0, #0 82; CHECK-NEXT: bx lr 83entry: 84 %0 = fcmp ogt <4 x half> %a, zeroinitializer 85 %vcgtz.i = sext <4 x i1> %0 to <4 x i16> 86 ret <4 x i16> %vcgtz.i 87} 88 89define dso_local <8 x i16> @test_vcgtzq_f16(<8 x half> %a) { 90; CHECKLABEL: test_vcgtzq_f16: 91; CHECK-LABEL: test_vcgtzq_f16: 92; CHECK: @ %bb.0: @ %entry 93; CHECK-NEXT: vcgt.f16 q0, q0, #0 94; CHECK-NEXT: bx lr 95entry: 96 %0 = fcmp ogt <8 x half> %a, zeroinitializer 97 %vcgtz.i = sext <8 x i1> %0 to <8 x i16> 98 ret <8 x i16> %vcgtz.i 99} 100 101define dso_local <4 x i16> @test_vclez_f16(<4 x half> %a) { 102; CHECKLABEL: test_vclez_f16: 103; CHECK-LABEL: test_vclez_f16: 104; CHECK: @ %bb.0: @ %entry 105; CHECK-NEXT: vcle.f16 d0, d0, #0 106; CHECK-NEXT: bx lr 107entry: 108 %0 = fcmp ole <4 x half> %a, zeroinitializer 109 %vclez.i = sext <4 x i1> %0 to <4 x i16> 110 ret <4 x i16> %vclez.i 111} 112 113define dso_local <8 x i16> @test_vclezq_f16(<8 x half> %a) { 114; CHECKLABEL: test_vclezq_f16: 115; CHECK-LABEL: test_vclezq_f16: 116; CHECK: @ %bb.0: @ %entry 117; CHECK-NEXT: vcle.f16 q0, q0, #0 118; CHECK-NEXT: bx lr 119entry: 120 %0 = fcmp ole <8 x half> %a, zeroinitializer 121 %vclez.i = sext <8 x i1> %0 to <8 x i16> 122 ret <8 x i16> %vclez.i 123} 124 125define dso_local <4 x i16> @test_vcltz_f16(<4 x half> %a) { 126; CHECKLABEL: test_vcltz_f16: 127; CHECK-LABEL: test_vcltz_f16: 128; CHECK: @ %bb.0: @ %entry 129; CHECK-NEXT: vclt.f16 d0, d0, #0 130; CHECK-NEXT: bx lr 131entry: 132 %0 = fcmp olt <4 x half> %a, zeroinitializer 133 %vcltz.i = sext <4 x i1> %0 to <4 x i16> 134 ret <4 x i16> %vcltz.i 135} 136 137define dso_local <8 x i16> @test_vcltzq_f16(<8 x half> %a) { 138; CHECKLABEL: test_vcltzq_f16: 139; CHECK-LABEL: test_vcltzq_f16: 140; CHECK: @ %bb.0: @ %entry 141; CHECK-NEXT: vclt.f16 q0, q0, #0 142; CHECK-NEXT: bx lr 143entry: 144 %0 = fcmp olt <8 x half> %a, zeroinitializer 145 %vcltz.i = sext <8 x i1> %0 to <8 x i16> 146 ret <8 x i16> %vcltz.i 147} 148 149define dso_local <4 x half> @test_vcvt_f16_s16(<4 x i16> %a) { 150; CHECK-LABEL: test_vcvt_f16_s16: 151; CHECK: @ %bb.0: @ %entry 152; CHECK-NEXT: vcvt.f16.s16 d0, d0 153; CHECK-NEXT: bx lr 154entry: 155 %vcvt.i = sitofp <4 x i16> %a to <4 x half> 156 ret <4 x half> %vcvt.i 157} 158 159define dso_local <8 x half> @test_vcvtq_f16_s16(<8 x i16> %a) { 160; CHECK-LABEL: test_vcvtq_f16_s16: 161; CHECK: @ %bb.0: @ %entry 162; CHECK-NEXT: vcvt.f16.s16 q0, q0 163; CHECK-NEXT: bx lr 164entry: 165 %vcvt.i = sitofp <8 x i16> %a to <8 x half> 166 ret <8 x half> %vcvt.i 167} 168 169define dso_local <4 x half> @test_vcvt_f16_u16(<4 x i16> %a) { 170; CHECK-LABEL: test_vcvt_f16_u16: 171; CHECK: @ %bb.0: @ %entry 172; CHECK-NEXT: vcvt.f16.u16 d0, d0 173; CHECK-NEXT: bx lr 174entry: 175 %vcvt.i = uitofp <4 x i16> %a to <4 x half> 176 ret <4 x half> %vcvt.i 177} 178 179define dso_local <8 x half> @test_vcvtq_f16_u16(<8 x i16> %a) { 180; CHECK-LABEL: test_vcvtq_f16_u16: 181; CHECK: @ %bb.0: @ %entry 182; CHECK-NEXT: vcvt.f16.u16 q0, q0 183; CHECK-NEXT: bx lr 184entry: 185 %vcvt.i = uitofp <8 x i16> %a to <8 x half> 186 ret <8 x half> %vcvt.i 187} 188 189define dso_local <4 x i16> @test_vcvt_s16_f16(<4 x half> %a) { 190; CHECK-LABEL: test_vcvt_s16_f16: 191; CHECK: @ %bb.0: @ %entry 192; CHECK-NEXT: vcvt.s16.f16 d0, d0 193; CHECK-NEXT: bx lr 194entry: 195 %vcvt.i = fptosi <4 x half> %a to <4 x i16> 196 ret <4 x i16> %vcvt.i 197} 198 199define dso_local <8 x i16> @test_vcvtq_s16_f16(<8 x half> %a) { 200; CHECK-LABEL: test_vcvtq_s16_f16: 201; CHECK: @ %bb.0: @ %entry 202; CHECK-NEXT: vcvt.s16.f16 q0, q0 203; CHECK-NEXT: bx lr 204entry: 205 %vcvt.i = fptosi <8 x half> %a to <8 x i16> 206 ret <8 x i16> %vcvt.i 207} 208 209define dso_local <4 x i16> @test_vcvt_u16_f16(<4 x half> %a) { 210; CHECK-LABEL: test_vcvt_u16_f16: 211; CHECK: @ %bb.0: @ %entry 212; CHECK-NEXT: vcvt.u16.f16 d0, d0 213; CHECK-NEXT: bx lr 214entry: 215 %vcvt.i = fptoui <4 x half> %a to <4 x i16> 216 ret <4 x i16> %vcvt.i 217} 218 219define dso_local <8 x i16> @test_vcvtq_u16_f16(<8 x half> %a) { 220; CHECK-LABEL: test_vcvtq_u16_f16: 221; CHECK: @ %bb.0: @ %entry 222; CHECK-NEXT: vcvt.u16.f16 q0, q0 223; CHECK-NEXT: bx lr 224entry: 225 %vcvt.i = fptoui <8 x half> %a to <8 x i16> 226 ret <8 x i16> %vcvt.i 227} 228 229define dso_local <4 x i16> @test_vcvta_s16_f16(<4 x half> %a) { 230; CHECK-LABEL: test_vcvta_s16_f16: 231; CHECK: @ %bb.0: @ %entry 232; CHECK-NEXT: vcvta.s16.f16 d0, d0 233; CHECK-NEXT: bx lr 234entry: 235 %vcvta_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half> %a) 236 ret <4 x i16> %vcvta_s16_v1.i 237} 238 239define dso_local <4 x i16> @test_vcvta_u16_f16(<4 x half> %a) { 240; CHECK-LABEL: test_vcvta_u16_f16: 241; CHECK: @ %bb.0: @ %entry 242; CHECK-NEXT: vcvta.u16.f16 d0, d0 243; CHECK-NEXT: bx lr 244entry: 245 %vcvta_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half> %a) 246 ret <4 x i16> %vcvta_u16_v1.i 247} 248 249define dso_local <8 x i16> @test_vcvtaq_s16_f16(<8 x half> %a) { 250; CHECK-LABEL: test_vcvtaq_s16_f16: 251; CHECK: @ %bb.0: @ %entry 252; CHECK-NEXT: vcvta.s16.f16 q0, q0 253; CHECK-NEXT: bx lr 254entry: 255 %vcvtaq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half> %a) 256 ret <8 x i16> %vcvtaq_s16_v1.i 257} 258 259define dso_local <4 x i16> @test_vcvtm_s16_f16(<4 x half> %a) { 260; CHECK-LABEL: test_vcvtm_s16_f16: 261; CHECK: @ %bb.0: @ %entry 262; CHECK-NEXT: vcvtm.s16.f16 d0, d0 263; CHECK-NEXT: bx lr 264entry: 265 %vcvtm_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half> %a) 266 ret <4 x i16> %vcvtm_s16_v1.i 267} 268 269define dso_local <8 x i16> @test_vcvtmq_s16_f16(<8 x half> %a) { 270; CHECK-LABEL: test_vcvtmq_s16_f16: 271; CHECK: @ %bb.0: @ %entry 272; CHECK-NEXT: vcvtm.s16.f16 q0, q0 273; CHECK-NEXT: bx lr 274entry: 275 %vcvtmq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half> %a) 276 ret <8 x i16> %vcvtmq_s16_v1.i 277} 278 279define dso_local <4 x i16> @test_vcvtm_u16_f16(<4 x half> %a) { 280; CHECK-LABEL: test_vcvtm_u16_f16: 281; CHECK: @ %bb.0: @ %entry 282; CHECK-NEXT: vcvtm.u16.f16 d0, d0 283; CHECK-NEXT: bx lr 284entry: 285 %vcvtm_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half> %a) 286 ret <4 x i16> %vcvtm_u16_v1.i 287} 288 289define dso_local <8 x i16> @test_vcvtmq_u16_f16(<8 x half> %a) { 290; CHECK-LABEL: test_vcvtmq_u16_f16: 291; CHECK: @ %bb.0: @ %entry 292; CHECK-NEXT: vcvtm.u16.f16 q0, q0 293; CHECK-NEXT: bx lr 294entry: 295 %vcvtmq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half> %a) 296 ret <8 x i16> %vcvtmq_u16_v1.i 297} 298 299define dso_local <4 x i16> @test_vcvtn_s16_f16(<4 x half> %a) { 300; CHECK-LABEL: test_vcvtn_s16_f16: 301; CHECK: @ %bb.0: @ %entry 302; CHECK-NEXT: vcvtn.s16.f16 d0, d0 303; CHECK-NEXT: bx lr 304entry: 305 %vcvtn_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half> %a) 306 ret <4 x i16> %vcvtn_s16_v1.i 307} 308 309define dso_local <8 x i16> @test_vcvtnq_s16_f16(<8 x half> %a) { 310; CHECK-LABEL: test_vcvtnq_s16_f16: 311; CHECK: @ %bb.0: @ %entry 312; CHECK-NEXT: vcvtn.s16.f16 q0, q0 313; CHECK-NEXT: bx lr 314entry: 315 %vcvtnq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half> %a) 316 ret <8 x i16> %vcvtnq_s16_v1.i 317} 318 319define dso_local <4 x i16> @test_vcvtn_u16_f16(<4 x half> %a) { 320; CHECK-LABEL: test_vcvtn_u16_f16: 321; CHECK: @ %bb.0: @ %entry 322; CHECK-NEXT: vcvtn.u16.f16 d0, d0 323; CHECK-NEXT: bx lr 324entry: 325 %vcvtn_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half> %a) 326 ret <4 x i16> %vcvtn_u16_v1.i 327} 328 329define dso_local <8 x i16> @test_vcvtnq_u16_f16(<8 x half> %a) { 330; CHECK-LABEL: test_vcvtnq_u16_f16: 331; CHECK: @ %bb.0: @ %entry 332; CHECK-NEXT: vcvtn.u16.f16 q0, q0 333; CHECK-NEXT: bx lr 334entry: 335 %vcvtnq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half> %a) 336 ret <8 x i16> %vcvtnq_u16_v1.i 337} 338 339define dso_local <4 x i16> @test_vcvtp_s16_f16(<4 x half> %a) { 340; CHECK-LABEL: test_vcvtp_s16_f16: 341; CHECK: @ %bb.0: @ %entry 342; CHECK-NEXT: vcvtp.s16.f16 d0, d0 343; CHECK-NEXT: bx lr 344entry: 345 %vcvtp_s16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half> %a) 346 ret <4 x i16> %vcvtp_s16_v1.i 347} 348 349define dso_local <8 x i16> @test_vcvtpq_s16_f16(<8 x half> %a) { 350; CHECK-LABEL: test_vcvtpq_s16_f16: 351; CHECK: @ %bb.0: @ %entry 352; CHECK-NEXT: vcvtp.s16.f16 q0, q0 353; CHECK-NEXT: bx lr 354entry: 355 %vcvtpq_s16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half> %a) 356 ret <8 x i16> %vcvtpq_s16_v1.i 357} 358 359define dso_local <4 x i16> @test_vcvtp_u16_f16(<4 x half> %a) { 360; CHECK-LABEL: test_vcvtp_u16_f16: 361; CHECK: @ %bb.0: @ %entry 362; CHECK-NEXT: vcvtp.u16.f16 d0, d0 363; CHECK-NEXT: bx lr 364entry: 365 %vcvtp_u16_v1.i = tail call <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half> %a) 366 ret <4 x i16> %vcvtp_u16_v1.i 367} 368 369define dso_local <8 x i16> @test_vcvtpq_u16_f16(<8 x half> %a) { 370; CHECK-LABEL: test_vcvtpq_u16_f16: 371; CHECK: @ %bb.0: @ %entry 372; CHECK-NEXT: vcvtp.u16.f16 q0, q0 373; CHECK-NEXT: bx lr 374entry: 375 %vcvtpq_u16_v1.i = tail call <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half> %a) 376 ret <8 x i16> %vcvtpq_u16_v1.i 377} 378 379define dso_local <4 x half> @test_vneg_f16(<4 x half> %a) { 380; CHECKLABEL: test_vneg_f16: 381; CHECK-LABEL: test_vneg_f16: 382; CHECK: @ %bb.0: @ %entry 383; CHECK-NEXT: vneg.f16 d0, d0 384; CHECK-NEXT: bx lr 385entry: 386 %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a 387 ret <4 x half> %sub.i 388} 389 390define dso_local <8 x half> @test_vnegq_f16(<8 x half> %a) { 391; CHECKLABEL: test_vnegq_f16: 392; CHECK-LABEL: test_vnegq_f16: 393; CHECK: @ %bb.0: @ %entry 394; CHECK-NEXT: vneg.f16 q0, q0 395; CHECK-NEXT: bx lr 396entry: 397 %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %a 398 ret <8 x half> %sub.i 399} 400 401define dso_local <4 x half> @test_vrecpe_f16(<4 x half> %a) { 402; CHECKLABEL: test_vrecpe_f16: 403; CHECK-LABEL: test_vrecpe_f16: 404; CHECK: @ %bb.0: @ %entry 405; CHECK-NEXT: vrecpe.f16 d0, d0 406; CHECK-NEXT: bx lr 407entry: 408 %vrecpe_v1.i = tail call <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half> %a) 409 ret <4 x half> %vrecpe_v1.i 410} 411 412define dso_local <8 x half> @test_vrecpeq_f16(<8 x half> %a) { 413; CHECKLABEL: test_vrecpeq_f16: 414; CHECK-LABEL: test_vrecpeq_f16: 415; CHECK: @ %bb.0: @ %entry 416; CHECK-NEXT: vrecpe.f16 q0, q0 417; CHECK-NEXT: bx lr 418entry: 419 %vrecpeq_v1.i = tail call <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half> %a) 420 ret <8 x half> %vrecpeq_v1.i 421} 422 423define dso_local <4 x half> @test_vrnd_f16(<4 x half> %a) { 424; CHECKLABEL: test_vrnd_f16: 425; CHECK-LABEL: test_vrnd_f16: 426; CHECK: @ %bb.0: @ %entry 427; CHECK-NEXT: vrintz.f16 d0, d0 428; CHECK-NEXT: bx lr 429entry: 430 %vrnd_v1.i = tail call <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half> %a) 431 ret <4 x half> %vrnd_v1.i 432} 433 434define dso_local <8 x half> @test_vrndq_f16(<8 x half> %a) { 435; CHECKLABEL: test_vrndq_f16: 436; CHECK-LABEL: test_vrndq_f16: 437; CHECK: @ %bb.0: @ %entry 438; CHECK-NEXT: vrintz.f16 q0, q0 439; CHECK-NEXT: bx lr 440entry: 441 %vrndq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half> %a) 442 ret <8 x half> %vrndq_v1.i 443} 444 445define dso_local <4 x half> @test_vrnda_f16(<4 x half> %a) { 446; CHECKLABEL: test_vrnda_f16: 447; CHECK-LABEL: test_vrnda_f16: 448; CHECK: @ %bb.0: @ %entry 449; CHECK-NEXT: vrinta.f16 d0, d0 450; CHECK-NEXT: bx lr 451entry: 452 %vrnda_v1.i = tail call <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half> %a) 453 ret <4 x half> %vrnda_v1.i 454} 455 456define dso_local <8 x half> @test_vrndaq_f16(<8 x half> %a) { 457; CHECKLABEL: test_vrndaq_f16: 458; CHECK-LABEL: test_vrndaq_f16: 459; CHECK: @ %bb.0: @ %entry 460; CHECK-NEXT: vrinta.f16 q0, q0 461; CHECK-NEXT: bx lr 462entry: 463 %vrndaq_v1.i = tail call <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half> %a) 464 ret <8 x half> %vrndaq_v1.i 465} 466 467define dso_local <4 x half> @test_vrndm_f16(<4 x half> %a) { 468; CHECKLABEL: test_vrndm_f16: 469; CHECK-LABEL: test_vrndm_f16: 470; CHECK: @ %bb.0: @ %entry 471; CHECK-NEXT: vrintm.f16 d0, d0 472; CHECK-NEXT: bx lr 473entry: 474 %vrndm_v1.i = tail call <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half> %a) 475 ret <4 x half> %vrndm_v1.i 476} 477 478define dso_local <8 x half> @test_vrndmq_f16(<8 x half> %a) { 479; CHECKLABEL: test_vrndmq_f16: 480; CHECK-LABEL: test_vrndmq_f16: 481; CHECK: @ %bb.0: @ %entry 482; CHECK-NEXT: vrintm.f16 q0, q0 483; CHECK-NEXT: bx lr 484entry: 485 %vrndmq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half> %a) 486 ret <8 x half> %vrndmq_v1.i 487} 488 489define dso_local <4 x half> @test_vrndn_f16(<4 x half> %a) { 490; CHECKLABEL: test_vrndn_f16: 491; CHECK-LABEL: test_vrndn_f16: 492; CHECK: @ %bb.0: @ %entry 493; CHECK-NEXT: vrintn.f16 d0, d0 494; CHECK-NEXT: bx lr 495entry: 496 %vrndn_v1.i = tail call <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half> %a) 497 ret <4 x half> %vrndn_v1.i 498} 499 500define dso_local <8 x half> @test_vrndnq_f16(<8 x half> %a) { 501; CHECKLABEL: test_vrndnq_f16: 502; CHECK-LABEL: test_vrndnq_f16: 503; CHECK: @ %bb.0: @ %entry 504; CHECK-NEXT: vrintn.f16 q0, q0 505; CHECK-NEXT: bx lr 506entry: 507 %vrndnq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half> %a) 508 ret <8 x half> %vrndnq_v1.i 509} 510 511define dso_local <4 x half> @test_vrndp_f16(<4 x half> %a) { 512; CHECKLABEL: test_vrndp_f16: 513; CHECK-LABEL: test_vrndp_f16: 514; CHECK: @ %bb.0: @ %entry 515; CHECK-NEXT: vrintp.f16 d0, d0 516; CHECK-NEXT: bx lr 517entry: 518 %vrndp_v1.i = tail call <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half> %a) 519 ret <4 x half> %vrndp_v1.i 520} 521 522define dso_local <8 x half> @test_vrndpq_f16(<8 x half> %a) { 523; CHECKLABEL: test_vrndpq_f16: 524; CHECK-LABEL: test_vrndpq_f16: 525; CHECK: @ %bb.0: @ %entry 526; CHECK-NEXT: vrintp.f16 q0, q0 527; CHECK-NEXT: bx lr 528entry: 529 %vrndpq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half> %a) 530 ret <8 x half> %vrndpq_v1.i 531} 532 533define dso_local <4 x half> @test_vrndx_f16(<4 x half> %a) { 534; CHECKLABEL: test_vrndx_f16: 535; CHECK-LABEL: test_vrndx_f16: 536; CHECK: @ %bb.0: @ %entry 537; CHECK-NEXT: vrintx.f16 d0, d0 538; CHECK-NEXT: bx lr 539entry: 540 %vrndx_v1.i = tail call <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half> %a) 541 ret <4 x half> %vrndx_v1.i 542} 543 544define dso_local <8 x half> @test_vrndxq_f16(<8 x half> %a) { 545; CHECKLABEL: test_vrndxq_f16: 546; CHECK-LABEL: test_vrndxq_f16: 547; CHECK: @ %bb.0: @ %entry 548; CHECK-NEXT: vrintx.f16 q0, q0 549; CHECK-NEXT: bx lr 550entry: 551 %vrndxq_v1.i = tail call <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half> %a) 552 ret <8 x half> %vrndxq_v1.i 553} 554 555define dso_local <4 x half> @test_vrsqrte_f16(<4 x half> %a) { 556; CHECKLABEL: test_vrsqrte_f16: 557; CHECK-LABEL: test_vrsqrte_f16: 558; CHECK: @ %bb.0: @ %entry 559; CHECK-NEXT: vrsqrte.f16 d0, d0 560; CHECK-NEXT: bx lr 561entry: 562 %vrsqrte_v1.i = tail call <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half> %a) 563 ret <4 x half> %vrsqrte_v1.i 564} 565 566define dso_local <8 x half> @test_vrsqrteq_f16(<8 x half> %a) { 567; CHECKLABEL: test_vrsqrteq_f16: 568; CHECK-LABEL: test_vrsqrteq_f16: 569; CHECK: @ %bb.0: @ %entry 570; CHECK-NEXT: vrsqrte.f16 q0, q0 571; CHECK-NEXT: bx lr 572entry: 573 %vrsqrteq_v1.i = tail call <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half> %a) 574 ret <8 x half> %vrsqrteq_v1.i 575} 576 577define dso_local <4 x half> @test_vadd_f16(<4 x half> %a, <4 x half> %b) { 578; CHECKLABEL: test_vadd_f16: 579; CHECK-LABEL: test_vadd_f16: 580; CHECK: @ %bb.0: @ %entry 581; CHECK-NEXT: vadd.f16 d0, d0, d1 582; CHECK-NEXT: bx lr 583entry: 584 %add.i = fadd <4 x half> %a, %b 585 ret <4 x half> %add.i 586} 587 588define dso_local <8 x half> @test_vaddq_f16(<8 x half> %a, <8 x half> %b) { 589; CHECKLABEL: test_vaddq_f16: 590; CHECK-LABEL: test_vaddq_f16: 591; CHECK: @ %bb.0: @ %entry 592; CHECK-NEXT: vadd.f16 q0, q0, q1 593; CHECK-NEXT: bx lr 594entry: 595 %add.i = fadd <8 x half> %a, %b 596 ret <8 x half> %add.i 597} 598 599define dso_local <4 x half> @test_vabd_f16(<4 x half> %a, <4 x half> %b) { 600; CHECKLABEL: test_vabd_f16: 601; CHECK-LABEL: test_vabd_f16: 602; CHECK: @ %bb.0: @ %entry 603; CHECK-NEXT: vabd.f16 d0, d0, d1 604; CHECK-NEXT: bx lr 605entry: 606 %vabd_v2.i = tail call <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half> %a, <4 x half> %b) 607 ret <4 x half> %vabd_v2.i 608} 609 610define dso_local <8 x half> @test_vabdq_f16(<8 x half> %a, <8 x half> %b) { 611; CHECKLABEL: test_vabdq_f16: 612; CHECK-LABEL: test_vabdq_f16: 613; CHECK: @ %bb.0: @ %entry 614; CHECK-NEXT: vabd.f16 q0, q0, q1 615; CHECK-NEXT: bx lr 616entry: 617 %vabdq_v2.i = tail call <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half> %a, <8 x half> %b) 618 ret <8 x half> %vabdq_v2.i 619} 620 621define dso_local <4 x i16> @test_vcage_f16(<4 x half> %a, <4 x half> %b) { 622; CHECKLABEL: test_vcage_f16: 623; CHECK-LABEL: test_vcage_f16: 624; CHECK: @ %bb.0: @ %entry 625; CHECK-NEXT: vacge.f16 d0, d0, d1 626; CHECK-NEXT: bx lr 627entry: 628 %vcage_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %a, <4 x half> %b) 629 ret <4 x i16> %vcage_v2.i 630} 631 632define dso_local <8 x i16> @test_vcageq_f16(<8 x half> %a, <8 x half> %b) { 633; CHECKLABEL: test_vcageq_f16: 634; CHECK-LABEL: test_vcageq_f16: 635; CHECK: @ %bb.0: @ %entry 636; CHECK-NEXT: vacge.f16 q0, q0, q1 637; CHECK-NEXT: bx lr 638entry: 639 %vcageq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %a, <8 x half> %b) 640 ret <8 x i16> %vcageq_v2.i 641} 642 643define dso_local <4 x i16> @test_vcagt_f16(<4 x half> %a, <4 x half> %b) { 644; CHECK-LABEL: test_vcagt_f16: 645; CHECK: @ %bb.0: @ %entry 646; CHECK-NEXT: vacgt.f16 d0, d0, d1 647; CHECK-NEXT: bx lr 648entry: 649 %vcagt_v2.i = tail call <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half> %a, <4 x half> %b) 650 ret <4 x i16> %vcagt_v2.i 651} 652 653define dso_local <8 x i16> @test_vcagtq_f16(<8 x half> %a, <8 x half> %b) { 654; CHECK-LABEL: test_vcagtq_f16: 655; CHECK: @ %bb.0: @ %entry 656; CHECK-NEXT: vacgt.f16 q0, q0, q1 657; CHECK-NEXT: bx lr 658entry: 659 %vcagtq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half> %a, <8 x half> %b) 660 ret <8 x i16> %vcagtq_v2.i 661} 662 663define dso_local <4 x i16> @test_vcale_f16(<4 x half> %a, <4 x half> %b) { 664; CHECKLABEL: test_vcale_f16: 665; CHECK-LABEL: test_vcale_f16: 666; CHECK: @ %bb.0: @ %entry 667; CHECK-NEXT: vacge.f16 d0, d1, d0 668; CHECK-NEXT: bx lr 669entry: 670 %vcale_v2.i = tail call <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half> %b, <4 x half> %a) 671 ret <4 x i16> %vcale_v2.i 672} 673 674define dso_local <8 x i16> @test_vcaleq_f16(<8 x half> %a, <8 x half> %b) { 675; CHECKLABEL: test_vcaleq_f16: 676; CHECK-LABEL: test_vcaleq_f16: 677; CHECK: @ %bb.0: @ %entry 678; CHECK-NEXT: vacge.f16 q0, q1, q0 679; CHECK-NEXT: bx lr 680entry: 681 %vcaleq_v2.i = tail call <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half> %b, <8 x half> %a) 682 ret <8 x i16> %vcaleq_v2.i 683} 684 685define dso_local <4 x i16> @test_vceq_f16(<4 x half> %a, <4 x half> %b) { 686; CHECKLABEL: test_vceq_f16: 687; CHECK-LABEL: test_vceq_f16: 688; CHECK: @ %bb.0: @ %entry 689; CHECK-NEXT: vceq.f16 d0, d0, d1 690; CHECK-NEXT: bx lr 691entry: 692 %cmp.i = fcmp oeq <4 x half> %a, %b 693 %sext.i = sext <4 x i1> %cmp.i to <4 x i16> 694 ret <4 x i16> %sext.i 695} 696 697define dso_local <8 x i16> @test_vceqq_f16(<8 x half> %a, <8 x half> %b) { 698; CHECKLABEL: test_vceqq_f16: 699; CHECK-LABEL: test_vceqq_f16: 700; CHECK: @ %bb.0: @ %entry 701; CHECK-NEXT: vceq.f16 q0, q0, q1 702; CHECK-NEXT: bx lr 703entry: 704 %cmp.i = fcmp oeq <8 x half> %a, %b 705 %sext.i = sext <8 x i1> %cmp.i to <8 x i16> 706 ret <8 x i16> %sext.i 707} 708 709define dso_local <4 x i16> @test_vcge_f16(<4 x half> %a, <4 x half> %b) { 710; CHECKLABEL: test_vcge_f16: 711; CHECK-LABEL: test_vcge_f16: 712; CHECK: @ %bb.0: @ %entry 713; CHECK-NEXT: vcge.f16 d0, d0, d1 714; CHECK-NEXT: bx lr 715entry: 716 %cmp.i = fcmp oge <4 x half> %a, %b 717 %sext.i = sext <4 x i1> %cmp.i to <4 x i16> 718 ret <4 x i16> %sext.i 719} 720 721define dso_local <8 x i16> @test_vcgeq_f16(<8 x half> %a, <8 x half> %b) { 722; CHECKLABEL: test_vcgeq_f16: 723; CHECK-LABEL: test_vcgeq_f16: 724; CHECK: @ %bb.0: @ %entry 725; CHECK-NEXT: vcge.f16 q0, q0, q1 726; CHECK-NEXT: bx lr 727entry: 728 %cmp.i = fcmp oge <8 x half> %a, %b 729 %sext.i = sext <8 x i1> %cmp.i to <8 x i16> 730 ret <8 x i16> %sext.i 731} 732 733define dso_local <4 x i16> @test_vcgt_f16(<4 x half> %a, <4 x half> %b) { 734; CHECKLABEL: test_vcgt_f16: 735; CHECK-LABEL: test_vcgt_f16: 736; CHECK: @ %bb.0: @ %entry 737; CHECK-NEXT: vcgt.f16 d0, d0, d1 738; CHECK-NEXT: bx lr 739entry: 740 %cmp.i = fcmp ogt <4 x half> %a, %b 741 %sext.i = sext <4 x i1> %cmp.i to <4 x i16> 742 ret <4 x i16> %sext.i 743} 744 745define dso_local <8 x i16> @test_vcgtq_f16(<8 x half> %a, <8 x half> %b) { 746; CHECKLABEL: test_vcgtq_f16: 747; CHECK-LABEL: test_vcgtq_f16: 748; CHECK: @ %bb.0: @ %entry 749; CHECK-NEXT: vcgt.f16 q0, q0, q1 750; CHECK-NEXT: bx lr 751entry: 752 %cmp.i = fcmp ogt <8 x half> %a, %b 753 %sext.i = sext <8 x i1> %cmp.i to <8 x i16> 754 ret <8 x i16> %sext.i 755} 756 757define dso_local <4 x i16> @test_vcle_f16(<4 x half> %a, <4 x half> %b) { 758; CHECKLABEL: test_vcle_f16: 759; CHECK-LABEL: test_vcle_f16: 760; CHECK: @ %bb.0: @ %entry 761; CHECK-NEXT: vcge.f16 d0, d1, d0 762; CHECK-NEXT: bx lr 763entry: 764 %cmp.i = fcmp ole <4 x half> %a, %b 765 %sext.i = sext <4 x i1> %cmp.i to <4 x i16> 766 ret <4 x i16> %sext.i 767} 768 769define dso_local <8 x i16> @test_vcleq_f16(<8 x half> %a, <8 x half> %b) { 770; CHECKLABEL: test_vcleq_f16: 771; CHECK-LABEL: test_vcleq_f16: 772; CHECK: @ %bb.0: @ %entry 773; CHECK-NEXT: vcge.f16 q0, q1, q0 774; CHECK-NEXT: bx lr 775entry: 776 %cmp.i = fcmp ole <8 x half> %a, %b 777 %sext.i = sext <8 x i1> %cmp.i to <8 x i16> 778 ret <8 x i16> %sext.i 779} 780 781define dso_local <4 x i16> @test_vclt_f16(<4 x half> %a, <4 x half> %b) { 782; CHECKLABEL: test_vclt_f16: 783; CHECK-LABEL: test_vclt_f16: 784; CHECK: @ %bb.0: @ %entry 785; CHECK-NEXT: vcgt.f16 d0, d1, d0 786; CHECK-NEXT: bx lr 787entry: 788 %cmp.i = fcmp olt <4 x half> %a, %b 789 %sext.i = sext <4 x i1> %cmp.i to <4 x i16> 790 ret <4 x i16> %sext.i 791} 792 793define dso_local <8 x i16> @test_vcltq_f16(<8 x half> %a, <8 x half> %b) { 794; CHECKLABEL: test_vcltq_f16: 795; CHECK-LABEL: test_vcltq_f16: 796; CHECK: @ %bb.0: @ %entry 797; CHECK-NEXT: vcgt.f16 q0, q1, q0 798; CHECK-NEXT: bx lr 799entry: 800 %cmp.i = fcmp olt <8 x half> %a, %b 801 %sext.i = sext <8 x i1> %cmp.i to <8 x i16> 802 ret <8 x i16> %sext.i 803} 804 805define dso_local <4 x half> @test_vcvt_n_f16_s16(<4 x i16> %a) { 806; CHECKLABEL: test_vcvt_n_f16_s16: 807; CHECK-LABEL: test_vcvt_n_f16_s16: 808; CHECK: @ %bb.0: @ %entry 809; CHECK-NEXT: vcvt.f16.s16 d0, d0, #2 810; CHECK-NEXT: bx lr 811entry: 812 %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16> %a, i32 2) 813 ret <4 x half> %vcvt_n1 814} 815 816declare <4 x half> @llvm.arm.neon.vcvtfxs2fp.v4f16.v4i16(<4 x i16>, i32) #2 817 818define dso_local <8 x half> @test_vcvtq_n_f16_s16(<8 x i16> %a) { 819; CHECKLABEL: test_vcvtq_n_f16_s16: 820; CHECK-LABEL: test_vcvtq_n_f16_s16: 821; CHECK: @ %bb.0: @ %entry 822; CHECK-NEXT: vcvt.f16.s16 q0, q0, #2 823; CHECK-NEXT: bx lr 824entry: 825 %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16> %a, i32 2) 826 ret <8 x half> %vcvt_n1 827} 828 829declare <8 x half> @llvm.arm.neon.vcvtfxs2fp.v8f16.v8i16(<8 x i16>, i32) #2 830 831define dso_local <4 x half> @test_vcvt_n_f16_u16(<4 x i16> %a) { 832; CHECKLABEL: test_vcvt_n_f16_u16: 833; CHECK-LABEL: test_vcvt_n_f16_u16: 834; CHECK: @ %bb.0: @ %entry 835; CHECK-NEXT: vcvt.f16.u16 d0, d0, #2 836; CHECK-NEXT: bx lr 837entry: 838 %vcvt_n1 = tail call <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16> %a, i32 2) 839 ret <4 x half> %vcvt_n1 840} 841 842declare <4 x half> @llvm.arm.neon.vcvtfxu2fp.v4f16.v4i16(<4 x i16>, i32) #2 843 844define dso_local <8 x half> @test_vcvtq_n_f16_u16(<8 x i16> %a) { 845; CHECKLABEL: test_vcvtq_n_f16_u16: 846; CHECK-LABEL: test_vcvtq_n_f16_u16: 847; CHECK: @ %bb.0: @ %entry 848; CHECK-NEXT: vcvt.f16.u16 q0, q0, #2 849; CHECK-NEXT: bx lr 850entry: 851 %vcvt_n1 = tail call <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16> %a, i32 2) 852 ret <8 x half> %vcvt_n1 853} 854 855declare <8 x half> @llvm.arm.neon.vcvtfxu2fp.v8f16.v8i16(<8 x i16>, i32) #2 856 857define dso_local <4 x i16> @test_vcvt_n_s16_f16(<4 x half> %a) { 858; CHECKLABEL: test_vcvt_n_s16_f16: 859; CHECK-LABEL: test_vcvt_n_s16_f16: 860; CHECK: @ %bb.0: @ %entry 861; CHECK-NEXT: vcvt.s16.f16 d0, d0, #2 862; CHECK-NEXT: bx lr 863entry: 864 %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half> %a, i32 2) 865 ret <4 x i16> %vcvt_n1 866} 867 868declare <4 x i16> @llvm.arm.neon.vcvtfp2fxs.v4i16.v4f16(<4 x half>, i32) #2 869 870define dso_local <8 x i16> @test_vcvtq_n_s16_f16(<8 x half> %a) { 871; CHECKLABEL: test_vcvtq_n_s16_f16: 872; CHECK-LABEL: test_vcvtq_n_s16_f16: 873; CHECK: @ %bb.0: @ %entry 874; CHECK-NEXT: vcvt.s16.f16 q0, q0, #2 875; CHECK-NEXT: bx lr 876entry: 877 %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half> %a, i32 2) 878 ret <8 x i16> %vcvt_n1 879} 880 881declare <8 x i16> @llvm.arm.neon.vcvtfp2fxs.v8i16.v8f16(<8 x half>, i32) #2 882 883define dso_local <4 x i16> @test_vcvt_n_u16_f16(<4 x half> %a) { 884; CHECKLABEL: test_vcvt_n_u16_f16: 885; CHECK-LABEL: test_vcvt_n_u16_f16: 886; CHECK: @ %bb.0: @ %entry 887; CHECK-NEXT: vcvt.u16.f16 d0, d0, #2 888; CHECK-NEXT: bx lr 889entry: 890 %vcvt_n1 = tail call <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half> %a, i32 2) 891 ret <4 x i16> %vcvt_n1 892} 893 894declare <4 x i16> @llvm.arm.neon.vcvtfp2fxu.v4i16.v4f16(<4 x half>, i32) #2 895 896define dso_local <8 x i16> @test_vcvtq_n_u16_f16(<8 x half> %a) { 897; CHECKLABEL: test_vcvtq_n_u16_f16: 898; CHECK-LABEL: test_vcvtq_n_u16_f16: 899; CHECK: @ %bb.0: @ %entry 900; CHECK-NEXT: vcvt.u16.f16 q0, q0, #2 901; CHECK-NEXT: bx lr 902entry: 903 %vcvt_n1 = tail call <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half> %a, i32 2) 904 ret <8 x i16> %vcvt_n1 905} 906 907declare <8 x i16> @llvm.arm.neon.vcvtfp2fxu.v8i16.v8f16(<8 x half>, i32) #2 908 909define dso_local <4 x half> @test_vmax_f16(<4 x half> %a, <4 x half> %b) { 910; CHECKLABEL: test_vmax_f16: 911; CHECK-LABEL: test_vmax_f16: 912; CHECK: @ %bb.0: @ %entry 913; CHECK-NEXT: vmax.f16 d0, d0, d1 914; CHECK-NEXT: bx lr 915entry: 916 %vmax_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half> %a, <4 x half> %b) 917 ret <4 x half> %vmax_v2.i 918} 919 920define dso_local <8 x half> @test_vmaxq_f16(<8 x half> %a, <8 x half> %b) { 921; CHECKLABEL: test_vmaxq_f16: 922; CHECK-LABEL: test_vmaxq_f16: 923; CHECK: @ %bb.0: @ %entry 924; CHECK-NEXT: vmax.f16 q0, q0, q1 925; CHECK-NEXT: bx lr 926entry: 927 %vmaxq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half> %a, <8 x half> %b) 928 ret <8 x half> %vmaxq_v2.i 929} 930 931define dso_local <4 x half> @test_vmaxnm_f16(<4 x half> %a, <4 x half> %b) { 932; CHECK-LABEL: test_vmaxnm_f16: 933; CHECK: @ %bb.0: @ %entry 934; CHECK-NEXT: vmaxnm.f16 d0, d0, d1 935; CHECK-NEXT: bx lr 936entry: 937 %vmaxnm_v2.i = tail call <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half> %a, <4 x half> %b) 938 ret <4 x half> %vmaxnm_v2.i 939} 940 941define dso_local <8 x half> @test_vmaxnmq_f16(<8 x half> %a, <8 x half> %b) { 942; CHECK-LABEL: test_vmaxnmq_f16: 943; CHECK: @ %bb.0: @ %entry 944; CHECK-NEXT: vmaxnm.f16 q0, q0, q1 945; CHECK-NEXT: bx lr 946entry: 947 %vmaxnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half> %a, <8 x half> %b) 948 ret <8 x half> %vmaxnmq_v2.i 949} 950 951define dso_local <4 x half> @test_vmin_f16(<4 x half> %a, <4 x half> %b) { 952; CHECK-LABEL: test_vmin_f16: 953; CHECK: @ %bb.0: @ %entry 954; CHECK-NEXT: vmin.f16 d0, d0, d1 955; CHECK-NEXT: bx lr 956entry: 957 %vmin_v2.i = tail call <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half> %a, <4 x half> %b) 958 ret <4 x half> %vmin_v2.i 959} 960 961define dso_local <8 x half> @test_vminq_f16(<8 x half> %a, <8 x half> %b) { 962; CHECK-LABEL: test_vminq_f16: 963; CHECK: @ %bb.0: @ %entry 964; CHECK-NEXT: vmin.f16 q0, q0, q1 965; CHECK-NEXT: bx lr 966entry: 967 %vminq_v2.i = tail call <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half> %a, <8 x half> %b) 968 ret <8 x half> %vminq_v2.i 969} 970 971define dso_local <4 x half> @test_vminnm_f16(<4 x half> %a, <4 x half> %b) { 972; CHECK-LABEL: test_vminnm_f16: 973; CHECK: @ %bb.0: @ %entry 974; CHECK-NEXT: vminnm.f16 d0, d0, d1 975; CHECK-NEXT: bx lr 976entry: 977 %vminnm_v2.i = tail call <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half> %a, <4 x half> %b) 978 ret <4 x half> %vminnm_v2.i 979} 980 981define dso_local <8 x half> @test_vminnmq_f16(<8 x half> %a, <8 x half> %b) { 982; CHECK-LABEL: test_vminnmq_f16: 983; CHECK: @ %bb.0: @ %entry 984; CHECK-NEXT: vminnm.f16 q0, q0, q1 985; CHECK-NEXT: bx lr 986entry: 987 %vminnmq_v2.i = tail call <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half> %a, <8 x half> %b) 988 ret <8 x half> %vminnmq_v2.i 989} 990 991define dso_local <4 x half> @test_vmul_f16(<4 x half> %a, <4 x half> %b) { 992; CHECKLABEL: test_vmul_f16: 993; CHECK-LABEL: test_vmul_f16: 994; CHECK: @ %bb.0: @ %entry 995; CHECK-NEXT: vmul.f16 d0, d0, d1 996; CHECK-NEXT: bx lr 997entry: 998 %mul.i = fmul <4 x half> %a, %b 999 ret <4 x half> %mul.i 1000} 1001 1002define dso_local <8 x half> @test_vmulq_f16(<8 x half> %a, <8 x half> %b) { 1003; CHECKLABEL: test_vmulq_f16: 1004; CHECK-LABEL: test_vmulq_f16: 1005; CHECK: @ %bb.0: @ %entry 1006; CHECK-NEXT: vmul.f16 q0, q0, q1 1007; CHECK-NEXT: bx lr 1008entry: 1009 %mul.i = fmul <8 x half> %a, %b 1010 ret <8 x half> %mul.i 1011} 1012 1013define dso_local <4 x half> @test_vpadd_f16(<4 x half> %a, <4 x half> %b) { 1014; CHECKLABEL: test_vpadd_f16: 1015; CHECK-LABEL: test_vpadd_f16: 1016; CHECK: @ %bb.0: @ %entry 1017; CHECK-NEXT: vpadd.f16 d0, d0, d1 1018; CHECK-NEXT: bx lr 1019entry: 1020 %vpadd_v2.i = tail call <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half> %a, <4 x half> %b) 1021 ret <4 x half> %vpadd_v2.i 1022} 1023 1024define dso_local <4 x half> @test_vpmax_f16(<4 x half> %a, <4 x half> %b) { 1025; CHECKLABEL: test_vpmax_f16: 1026; CHECK-LABEL: test_vpmax_f16: 1027; CHECK: @ %bb.0: @ %entry 1028; CHECK-NEXT: vpmax.f16 d0, d0, d1 1029; CHECK-NEXT: bx lr 1030entry: 1031 %vpmax_v2.i = tail call <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half> %a, <4 x half> %b) 1032 ret <4 x half> %vpmax_v2.i 1033} 1034 1035define dso_local <4 x half> @test_vpmin_f16(<4 x half> %a, <4 x half> %b) { 1036; CHECKLABEL: test_vpmin_f16: 1037; CHECK-LABEL: test_vpmin_f16: 1038; CHECK: @ %bb.0: @ %entry 1039; CHECK-NEXT: vpmin.f16 d0, d0, d1 1040; CHECK-NEXT: bx lr 1041entry: 1042 %vpmin_v2.i = tail call <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half> %a, <4 x half> %b) 1043 ret <4 x half> %vpmin_v2.i 1044} 1045 1046define dso_local <4 x half> @test_vrecps_f16(<4 x half> %a, <4 x half> %b) { 1047; CHECKLABEL: test_vrecps_f16: 1048; CHECK-LABEL: test_vrecps_f16: 1049; CHECK: @ %bb.0: @ %entry 1050; CHECK-NEXT: vrecps.f16 d0, d0, d1 1051; CHECK-NEXT: bx lr 1052entry: 1053 %vrecps_v2.i = tail call <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half> %a, <4 x half> %b) 1054 ret <4 x half> %vrecps_v2.i 1055} 1056 1057define dso_local <8 x half> @test_vrecpsq_f16(<8 x half> %a, <8 x half> %b) { 1058; CHECKLABEL: test_vrecpsq_f16: 1059; CHECK-LABEL: test_vrecpsq_f16: 1060; CHECK: @ %bb.0: @ %entry 1061; CHECK-NEXT: vrecps.f16 q0, q0, q1 1062; CHECK-NEXT: bx lr 1063entry: 1064 %vrecpsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half> %a, <8 x half> %b) 1065 ret <8 x half> %vrecpsq_v2.i 1066} 1067 1068define dso_local <4 x half> @test_vrsqrts_f16(<4 x half> %a, <4 x half> %b) { 1069; CHECKLABEL: test_vrsqrts_f16: 1070; CHECK-LABEL: test_vrsqrts_f16: 1071; CHECK: @ %bb.0: @ %entry 1072; CHECK-NEXT: vrsqrts.f16 d0, d0, d1 1073; CHECK-NEXT: bx lr 1074entry: 1075 %vrsqrts_v2.i = tail call <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half> %a, <4 x half> %b) 1076 ret <4 x half> %vrsqrts_v2.i 1077} 1078 1079define dso_local <8 x half> @test_vrsqrtsq_f16(<8 x half> %a, <8 x half> %b) { 1080; CHECKLABEL: test_vrsqrtsq_f16: 1081; CHECK-LABEL: test_vrsqrtsq_f16: 1082; CHECK: @ %bb.0: @ %entry 1083; CHECK-NEXT: vrsqrts.f16 q0, q0, q1 1084; CHECK-NEXT: bx lr 1085entry: 1086 %vrsqrtsq_v2.i = tail call <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half> %a, <8 x half> %b) 1087 ret <8 x half> %vrsqrtsq_v2.i 1088} 1089 1090define dso_local <4 x half> @test_vsub_f16(<4 x half> %a, <4 x half> %b) { 1091; CHECKLABEL: test_vsub_f16: 1092; CHECK-LABEL: test_vsub_f16: 1093; CHECK: @ %bb.0: @ %entry 1094; CHECK-NEXT: vsub.f16 d0, d0, d1 1095; CHECK-NEXT: bx lr 1096entry: 1097 %sub.i = fsub <4 x half> %a, %b 1098 ret <4 x half> %sub.i 1099} 1100 1101define dso_local <8 x half> @test_vsubq_f16(<8 x half> %a, <8 x half> %b) { 1102; CHECKLABEL: test_vsubq_f16: 1103; CHECK-LABEL: test_vsubq_f16: 1104; CHECK: @ %bb.0: @ %entry 1105; CHECK-NEXT: vsub.f16 q0, q0, q1 1106; CHECK-NEXT: bx lr 1107entry: 1108 %sub.i = fsub <8 x half> %a, %b 1109 ret <8 x half> %sub.i 1110} 1111 1112define dso_local <4 x half> @test_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 1113; CHECK-LABEL: test_vfma_f16: 1114; CHECK: @ %bb.0: @ %entry 1115; CHECK-NEXT: vfma.f16 d0, d1, d2 1116; CHECK-NEXT: bx lr 1117entry: 1118 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a) 1119 ret <4 x half> %0 1120} 1121 1122define dso_local <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 1123; CHECK-LABEL: test_vfmaq_f16: 1124; CHECK: @ %bb.0: @ %entry 1125; CHECK-NEXT: vfma.f16 q0, q1, q2 1126; CHECK-NEXT: bx lr 1127entry: 1128 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a) 1129 ret <8 x half> %0 1130} 1131 1132define dso_local <4 x half> @test_vfms_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 1133; CHECK-LABEL: test_vfms_f16: 1134; CHECK: @ %bb.0: @ %entry 1135; CHECK-NEXT: vneg.f16 d16, d1 1136; CHECK-NEXT: vfma.f16 d0, d16, d2 1137; CHECK-NEXT: bx lr 1138entry: 1139 %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b 1140 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub.i, <4 x half> %c, <4 x half> %a) 1141 ret <4 x half> %0 1142} 1143 1144define dso_local <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { 1145; CHECK-LABEL: test_vfmsq_f16: 1146; CHECK: @ %bb.0: @ %entry 1147; CHECK-NEXT: vneg.f16 q8, q1 1148; CHECK-NEXT: vfma.f16 q0, q8, q2 1149; CHECK-NEXT: bx lr 1150entry: 1151 %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b 1152 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub.i, <8 x half> %c, <8 x half> %a) 1153 ret <8 x half> %0 1154} 1155 1156define dso_local <4 x half> @test_vmul_lane_f16(<4 x half> %a, <4 x half> %b) { 1157; CHECK-LABEL: test_vmul_lane_f16: 1158; CHECK: @ %bb.0: @ %entry 1159; CHECK-NEXT: vmul.f16 d0, d0, d1[3] 1160; CHECK-NEXT: bx lr 1161entry: 1162 %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1163 %mul = fmul <4 x half> %shuffle, %a 1164 ret <4 x half> %mul 1165} 1166 1167define dso_local <8 x half> @test_vmulq_lane_f16(<8 x half> %a, <4 x half> %b) { 1168; CHECK-LABEL: test_vmulq_lane_f16: 1169; CHECK: @ %bb.0: @ %entry 1170; CHECK-NEXT: @ kill: def $d2 killed $d2 def $q1 1171; CHECK-NEXT: vmul.f16 q0, q0, d2[3] 1172; CHECK-NEXT: bx lr 1173entry: 1174 %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1175 %mul = fmul <8 x half> %shuffle, %a 1176 ret <8 x half> %mul 1177} 1178 1179define dso_local <4 x half> @test_vmul_n_f16(<4 x half> %a, float %b.coerce) { 1180; CHECK-LABEL: test_vmul_n_f16: 1181; CHECK: @ %bb.0: @ %entry 1182; CHECK-NEXT: @ kill: def $s2 killed $s2 def $d1 1183; CHECK-NEXT: vmul.f16 d0, d0, d1[0] 1184; CHECK-NEXT: bx lr 1185entry: 1186 %0 = bitcast float %b.coerce to i32 1187 %tmp.0.extract.trunc = trunc i32 %0 to i16 1188 %1 = bitcast i16 %tmp.0.extract.trunc to half 1189 %vecinit = insertelement <4 x half> undef, half %1, i32 0 1190 %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer 1191 %mul = fmul <4 x half> %vecinit4, %a 1192 ret <4 x half> %mul 1193} 1194 1195define dso_local <8 x half> @test_vmulq_n_f16(<8 x half> %a, float %b.coerce) { 1196; CHECK-LABEL: test_vmulq_n_f16: 1197; CHECK: @ %bb.0: @ %entry 1198; CHECK-NEXT: @ kill: def $s4 killed $s4 def $d2 1199; CHECK-NEXT: vmul.f16 q0, q0, d2[0] 1200; CHECK-NEXT: bx lr 1201entry: 1202 %0 = bitcast float %b.coerce to i32 1203 %tmp.0.extract.trunc = trunc i32 %0 to i16 1204 %1 = bitcast i16 %tmp.0.extract.trunc to half 1205 %vecinit = insertelement <8 x half> undef, half %1, i32 0 1206 %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer 1207 %mul = fmul <8 x half> %vecinit8, %a 1208 ret <8 x half> %mul 1209} 1210 1211define dso_local <4 x half> @test_vbsl_f16(<4 x i16> %a, <4 x half> %b, <4 x half> %c) { 1212; CHECKLABEL: test_vbsl_f16: 1213; CHECK-LABEL: test_vbsl_f16: 1214; CHECK: @ %bb.0: @ %entry 1215; CHECK-NEXT: vbsl d0, d1, d2 1216; CHECK-NEXT: bx lr 1217entry: 1218 %0 = bitcast <4 x i16> %a to <8 x i8> 1219 %1 = bitcast <4 x half> %b to <8 x i8> 1220 %2 = bitcast <4 x half> %c to <8 x i8> 1221 %vbsl_v.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) 1222 %3 = bitcast <8 x i8> %vbsl_v.i to <4 x half> 1223 ret <4 x half> %3 1224} 1225 1226define dso_local <8 x half> @test_vbslq_f16(<8 x i16> %a, <8 x half> %b, <8 x half> %c) { 1227; CHECKLABEL: test_vbslq_f16: 1228; CHECK-LABEL: test_vbslq_f16: 1229; CHECK: @ %bb.0: @ %entry 1230; CHECK-NEXT: vbsl q0, q1, q2 1231; CHECK-NEXT: bx lr 1232entry: 1233 %0 = bitcast <8 x i16> %a to <16 x i8> 1234 %1 = bitcast <8 x half> %b to <16 x i8> 1235 %2 = bitcast <8 x half> %c to <16 x i8> 1236 %vbslq_v.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) 1237 %3 = bitcast <16 x i8> %vbslq_v.i to <8 x half> 1238 ret <8 x half> %3 1239} 1240 1241define dso_local %struct.float16x4x2_t @test_vzip_f16(<4 x half> %a, <4 x half> %b) { 1242; CHECK-LABEL: test_vzip_f16: 1243; CHECK: @ %bb.0: @ %entry 1244; CHECK-NEXT: vzip.16 d0, d1 1245; CHECK-NEXT: bx lr 1246entry: 1247 %vzip.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1248 %vzip1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1249 %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vzip.i, 0, 0 1250 %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vzip1.i, 0, 1 1251 ret %struct.float16x4x2_t %.fca.0.1.insert 1252} 1253 1254define dso_local %struct.float16x8x2_t @test_vzipq_f16(<8 x half> %a, <8 x half> %b) { 1255; CHECK-LABEL: test_vzipq_f16: 1256; CHECK: @ %bb.0: @ %entry 1257; CHECK-NEXT: vzip.16 q0, q1 1258; CHECK-NEXT: bx lr 1259entry: 1260 %vzip.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 1261 %vzip1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 1262 %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vzip.i, 0, 0 1263 %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vzip1.i, 0, 1 1264 ret %struct.float16x8x2_t %.fca.0.1.insert 1265} 1266 1267define dso_local %struct.float16x4x2_t @test_vuzp_f16(<4 x half> %a, <4 x half> %b) { 1268; CHECK-LABEL: test_vuzp_f16: 1269; CHECK: @ %bb.0: @ %entry 1270; CHECK-NEXT: vuzp.16 d0, d1 1271; CHECK-NEXT: bx lr 1272entry: 1273 %vuzp.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 1274 %vuzp1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 1275 %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vuzp.i, 0, 0 1276 %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vuzp1.i, 0, 1 1277 ret %struct.float16x4x2_t %.fca.0.1.insert 1278} 1279 1280define dso_local %struct.float16x8x2_t @test_vuzpq_f16(<8 x half> %a, <8 x half> %b) { 1281; CHECK-LABEL: test_vuzpq_f16: 1282; CHECK: @ %bb.0: @ %entry 1283; CHECK-NEXT: vuzp.16 q0, q1 1284; CHECK-NEXT: bx lr 1285entry: 1286 %vuzp.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1287 %vuzp1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1288 %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vuzp.i, 0, 0 1289 %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vuzp1.i, 0, 1 1290 ret %struct.float16x8x2_t %.fca.0.1.insert 1291} 1292 1293define dso_local %struct.float16x4x2_t @test_vtrn_f16(<4 x half> %a, <4 x half> %b) { 1294; CHECK-LABEL: test_vtrn_f16: 1295; CHECK: @ %bb.0: @ %entry 1296; CHECK-NEXT: vtrn.16 d0, d1 1297; CHECK-NEXT: bx lr 1298entry: 1299 %vtrn.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 1300 %vtrn1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7> 1301 %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vtrn.i, 0, 0 1302 %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vtrn1.i, 0, 1 1303 ret %struct.float16x4x2_t %.fca.0.1.insert 1304} 1305 1306define dso_local %struct.float16x8x2_t @test_vtrnq_f16(<8 x half> %a, <8 x half> %b) { 1307; CHECK-LABEL: test_vtrnq_f16: 1308; CHECK: @ %bb.0: @ %entry 1309; CHECK-NEXT: vtrn.16 q0, q1 1310; CHECK-NEXT: bx lr 1311entry: 1312 %vtrn.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 1313 %vtrn1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> 1314 %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vtrn.i, 0, 0 1315 %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vtrn1.i, 0, 1 1316 ret %struct.float16x8x2_t %.fca.0.1.insert 1317} 1318 1319define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) { 1320; CHECK-LABEL: test_vmov_n_f16: 1321; CHECK: @ %bb.0: @ %entry 1322; CHECK-NEXT: @ kill: def $s0 killed $s0 def $d0 1323; CHECK-NEXT: vdup.16 d0, d0[0] 1324; CHECK-NEXT: bx lr 1325entry: 1326 %0 = bitcast float %a.coerce to i32 1327 %tmp.0.extract.trunc = trunc i32 %0 to i16 1328 %1 = bitcast i16 %tmp.0.extract.trunc to half 1329 %vecinit = insertelement <4 x half> undef, half %1, i32 0 1330 %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer 1331 ret <4 x half> %vecinit4 1332} 1333 1334define dso_local <8 x half> @test_vmovq_n_f16(float %a.coerce) { 1335; CHECK-LABEL: test_vmovq_n_f16: 1336; CHECK: @ %bb.0: @ %entry 1337; CHECK-NEXT: @ kill: def $s0 killed $s0 def $d0 1338; CHECK-NEXT: vdup.16 q0, d0[0] 1339; CHECK-NEXT: bx lr 1340entry: 1341 %0 = bitcast float %a.coerce to i32 1342 %tmp.0.extract.trunc = trunc i32 %0 to i16 1343 %1 = bitcast i16 %tmp.0.extract.trunc to half 1344 %vecinit = insertelement <8 x half> undef, half %1, i32 0 1345 %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer 1346 ret <8 x half> %vecinit8 1347} 1348 1349define dso_local <4 x half> @test_vdup_n_f16(float %a.coerce) { 1350; CHECK-LABEL: test_vdup_n_f16: 1351; CHECK: @ %bb.0: @ %entry 1352; CHECK-NEXT: @ kill: def $s0 killed $s0 def $d0 1353; CHECK-NEXT: vdup.16 d0, d0[0] 1354; CHECK-NEXT: bx lr 1355entry: 1356 %0 = bitcast float %a.coerce to i32 1357 %tmp.0.extract.trunc = trunc i32 %0 to i16 1358 %1 = bitcast i16 %tmp.0.extract.trunc to half 1359 %vecinit = insertelement <4 x half> undef, half %1, i32 0 1360 %vecinit4 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer 1361 ret <4 x half> %vecinit4 1362} 1363 1364define dso_local <8 x half> @test_vdupq_n_f16(float %a.coerce) { 1365; CHECK-LABEL: test_vdupq_n_f16: 1366; CHECK: @ %bb.0: @ %entry 1367; CHECK-NEXT: @ kill: def $s0 killed $s0 def $d0 1368; CHECK-NEXT: vdup.16 q0, d0[0] 1369; CHECK-NEXT: bx lr 1370entry: 1371 %0 = bitcast float %a.coerce to i32 1372 %tmp.0.extract.trunc = trunc i32 %0 to i16 1373 %1 = bitcast i16 %tmp.0.extract.trunc to half 1374 %vecinit = insertelement <8 x half> undef, half %1, i32 0 1375 %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer 1376 ret <8 x half> %vecinit8 1377} 1378 1379define dso_local <4 x half> @test_vdup_lane_f16(<4 x half> %a) { 1380; CHECK-LABEL: test_vdup_lane_f16: 1381; CHECK: @ %bb.0: @ %entry 1382; CHECK-NEXT: vdup.16 d0, d0[3] 1383; CHECK-NEXT: bx lr 1384entry: 1385 %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 1386 ret <4 x half> %shuffle 1387} 1388 1389define dso_local <8 x half> @test_vdupq_lane_f16(<4 x half> %a) { 1390; CHECK-LABEL: test_vdupq_lane_f16: 1391; CHECK: @ %bb.0: @ %entry 1392; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0 1393; CHECK-NEXT: vdup.16 q0, d0[3] 1394; CHECK-NEXT: bx lr 1395entry: 1396 %shuffle = shufflevector <4 x half> %a, <4 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 1397 ret <8 x half> %shuffle 1398} 1399 1400define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) { 1401; CHECK-LABEL: test_vext_f16: 1402; CHECK: @ %bb.0: @ %entry 1403; CHECK-NEXT: vext.16 d0, d0, d1, #2 1404; CHECK-NEXT: bx lr 1405entry: 1406 %vext = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 1407 ret <4 x half> %vext 1408} 1409 1410define dso_local <8 x half> @test_vextq_f16(<8 x half> %a, <8 x half> %b) { 1411; CHECK-LABEL: test_vextq_f16: 1412; CHECK: @ %bb.0: @ %entry 1413; CHECK-NEXT: vext.16 q0, q0, q1, #5 1414; CHECK-NEXT: bx lr 1415entry: 1416 %vext = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12> 1417 ret <8 x half> %vext 1418} 1419 1420define dso_local <4 x half> @test_vrev64_f16(<4 x half> %a) { 1421; CHECK-LABEL: test_vrev64_f16: 1422; CHECK: @ %bb.0: @ %entry 1423; CHECK-NEXT: vrev64.16 d0, d0 1424; CHECK-NEXT: bx lr 1425entry: 1426 %shuffle.i = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1427 ret <4 x half> %shuffle.i 1428} 1429 1430define dso_local <8 x half> @test_vrev64q_f16(<8 x half> %a) { 1431; CHECK-LABEL: test_vrev64q_f16: 1432; CHECK: @ %bb.0: @ %entry 1433; CHECK-NEXT: vrev64.16 q0, q0 1434; CHECK-NEXT: bx lr 1435entry: 1436 %shuffle.i = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4> 1437 ret <8 x half> %shuffle.i 1438} 1439 1440define <4 x half> @test_vld_dup1_4xhalf(ptr %b) { 1441; CHECK-LABEL: test_vld_dup1_4xhalf: 1442; CHECK: @ %bb.0: @ %entry 1443; CHECK-NEXT: vld1.16 {d0[]}, [r0:16] 1444; CHECK-NEXT: bx lr 1445 1446entry: 1447 %b1 = load half, ptr %b, align 2 1448 %vecinit = insertelement <4 x half> undef, half %b1, i32 0 1449 %vecinit2 = insertelement <4 x half> %vecinit, half %b1, i32 1 1450 %vecinit3 = insertelement <4 x half> %vecinit2, half %b1, i32 2 1451 %vecinit4 = insertelement <4 x half> %vecinit3, half %b1, i32 3 1452 ret <4 x half> %vecinit4 1453} 1454 1455define <8 x half> @test_vld_dup1_8xhalf(ptr %b) local_unnamed_addr { 1456; CHECK-LABEL: test_vld_dup1_8xhalf: 1457; CHECK: @ %bb.0: @ %entry 1458; CHECK-NEXT: vld1.16 {d0[], d1[]}, [r0:16] 1459; CHECK-NEXT: bx lr 1460 1461entry: 1462 %b1 = load half, ptr %b, align 2 1463 %vecinit = insertelement <8 x half> undef, half %b1, i32 0 1464 %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer 1465 ret <8 x half> %vecinit8 1466} 1467 1468define <8 x half> @test_shufflevector8xhalf(<4 x half> %a) { 1469; CHECK-LABEL: test_shufflevector8xhalf: 1470; CHECK: @ %bb.0: @ %entry 1471; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0 1472; CHECK-NEXT: vmov.f64 d1, d0 1473; CHECK-NEXT: bx lr 1474 1475entry: 1476 %r = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1477 ret <8 x half> %r 1478} 1479 1480declare <4 x half> @llvm.fabs.v4f16(<4 x half>) 1481declare <8 x half> @llvm.fabs.v8f16(<8 x half>) 1482declare <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half>) 1483declare <4 x i16> @llvm.arm.neon.vcvtau.v4i16.v4f16(<4 x half>) 1484declare <8 x i16> @llvm.arm.neon.vcvtas.v8i16.v8f16(<8 x half>) 1485declare <4 x i16> @llvm.arm.neon.vcvtms.v4i16.v4f16(<4 x half>) 1486declare <8 x i16> @llvm.arm.neon.vcvtms.v8i16.v8f16(<8 x half>) 1487declare <4 x i16> @llvm.arm.neon.vcvtmu.v4i16.v4f16(<4 x half>) 1488declare <8 x i16> @llvm.arm.neon.vcvtmu.v8i16.v8f16(<8 x half>) 1489declare <4 x i16> @llvm.arm.neon.vcvtns.v4i16.v4f16(<4 x half>) 1490declare <8 x i16> @llvm.arm.neon.vcvtns.v8i16.v8f16(<8 x half>) 1491declare <4 x i16> @llvm.arm.neon.vcvtnu.v4i16.v4f16(<4 x half>) 1492declare <8 x i16> @llvm.arm.neon.vcvtnu.v8i16.v8f16(<8 x half>) 1493declare <4 x i16> @llvm.arm.neon.vcvtps.v4i16.v4f16(<4 x half>) 1494declare <8 x i16> @llvm.arm.neon.vcvtps.v8i16.v8f16(<8 x half>) 1495declare <4 x i16> @llvm.arm.neon.vcvtpu.v4i16.v4f16(<4 x half>) 1496declare <8 x i16> @llvm.arm.neon.vcvtpu.v8i16.v8f16(<8 x half>) 1497declare <4 x half> @llvm.arm.neon.vrecpe.v4f16(<4 x half>) 1498declare <8 x half> @llvm.arm.neon.vrecpe.v8f16(<8 x half>) 1499declare <4 x half> @llvm.arm.neon.vrintz.v4f16(<4 x half>) 1500declare <8 x half> @llvm.arm.neon.vrintz.v8f16(<8 x half>) 1501declare <4 x half> @llvm.arm.neon.vrinta.v4f16(<4 x half>) 1502declare <8 x half> @llvm.arm.neon.vrinta.v8f16(<8 x half>) 1503declare <4 x half> @llvm.arm.neon.vrintm.v4f16(<4 x half>) 1504declare <8 x half> @llvm.arm.neon.vrintm.v8f16(<8 x half>) 1505declare <4 x half> @llvm.arm.neon.vrintn.v4f16(<4 x half>) 1506declare <8 x half> @llvm.arm.neon.vrintn.v8f16(<8 x half>) 1507declare <4 x half> @llvm.arm.neon.vrintp.v4f16(<4 x half>) 1508declare <8 x half> @llvm.arm.neon.vrintp.v8f16(<8 x half>) 1509declare <4 x half> @llvm.arm.neon.vrintx.v4f16(<4 x half>) 1510declare <8 x half> @llvm.arm.neon.vrintx.v8f16(<8 x half>) 1511declare <4 x half> @llvm.arm.neon.vrsqrte.v4f16(<4 x half>) 1512declare <8 x half> @llvm.arm.neon.vrsqrte.v8f16(<8 x half>) 1513declare <4 x half> @llvm.arm.neon.vabds.v4f16(<4 x half>, <4 x half>) 1514declare <8 x half> @llvm.arm.neon.vabds.v8f16(<8 x half>, <8 x half>) 1515declare <4 x i16> @llvm.arm.neon.vacge.v4i16.v4f16(<4 x half>, <4 x half>) 1516declare <8 x i16> @llvm.arm.neon.vacge.v8i16.v8f16(<8 x half>, <8 x half>) 1517declare <4 x i16> @llvm.arm.neon.vacgt.v4i16.v4f16(<4 x half>, <4 x half>) 1518declare <8 x i16> @llvm.arm.neon.vacgt.v8i16.v8f16(<8 x half>, <8 x half>) 1519declare <4 x half> @llvm.arm.neon.vmaxs.v4f16(<4 x half>, <4 x half>) 1520declare <8 x half> @llvm.arm.neon.vmaxs.v8f16(<8 x half>, <8 x half>) 1521declare <4 x half> @llvm.arm.neon.vmaxnm.v4f16(<4 x half>, <4 x half>) 1522declare <8 x half> @llvm.arm.neon.vmaxnm.v8f16(<8 x half>, <8 x half>) 1523declare <4 x half> @llvm.arm.neon.vmins.v4f16(<4 x half>, <4 x half>) 1524declare <8 x half> @llvm.arm.neon.vmins.v8f16(<8 x half>, <8 x half>) 1525declare <4 x half> @llvm.arm.neon.vminnm.v4f16(<4 x half>, <4 x half>) 1526declare <8 x half> @llvm.arm.neon.vminnm.v8f16(<8 x half>, <8 x half>) 1527declare <4 x half> @llvm.arm.neon.vpadd.v4f16(<4 x half>, <4 x half>) 1528declare <4 x half> @llvm.arm.neon.vpmaxs.v4f16(<4 x half>, <4 x half>) 1529declare <4 x half> @llvm.arm.neon.vpmins.v4f16(<4 x half>, <4 x half>) 1530declare <4 x half> @llvm.arm.neon.vrecps.v4f16(<4 x half>, <4 x half>) 1531declare <8 x half> @llvm.arm.neon.vrecps.v8f16(<8 x half>, <8 x half>) 1532declare <4 x half> @llvm.arm.neon.vrsqrts.v4f16(<4 x half>, <4 x half>) 1533declare <8 x half> @llvm.arm.neon.vrsqrts.v8f16(<8 x half>, <8 x half>) 1534declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) 1535declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) 1536declare <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8>, <8 x i8>, <8 x i8>) 1537declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) 1538declare { <8 x half>, <8 x half> } @llvm.arm.neon.vld2lane.v8f16.p0(ptr, <8 x half>, <8 x half>, i32, i32) 1539declare { <4 x half>, <4 x half> } @llvm.arm.neon.vld2lane.v4f16.p0(ptr, <4 x half>, <4 x half>, i32, i32) 1540declare { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3lane.v8f16.p0(ptr, <8 x half>, <8 x half>, <8 x half>, i32, i32) 1541declare { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3lane.v4f16.p0(ptr, <4 x half>, <4 x half>, <4 x half>, i32, i32) 1542declare { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4lane.v8f16.p0(ptr, <8 x half>, <8 x half>, <8 x half>, <8 x half>, i32, i32) 1543declare { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4lane.v4f16.p0(ptr, <4 x half>, <4 x half>, <4 x half>, <4 x half>, i32, i32) 1544declare void @llvm.arm.neon.vst2lane.p0.v8f16(ptr, <8 x half>, <8 x half>, i32, i32) 1545declare void @llvm.arm.neon.vst2lane.p0.v4f16(ptr, <4 x half>, <4 x half>, i32, i32) 1546declare void @llvm.arm.neon.vst3lane.p0.v8f16(ptr, <8 x half>, <8 x half>, <8 x half>, i32, i32) 1547declare void @llvm.arm.neon.vst3lane.p0.v4f16(ptr, <4 x half>, <4 x half>, <4 x half>, i32, i32) 1548declare void @llvm.arm.neon.vst4lane.p0.v8f16(ptr, <8 x half>, <8 x half>, <8 x half>, <8 x half>, i32, i32) 1549declare void @llvm.arm.neon.vst4lane.p0.v4f16(ptr, <4 x half>, <4 x half>, <4 x half>, <4 x half>, i32, i32) 1550 1551define { <8 x half>, <8 x half> } @test_vld2q_lane_f16(ptr, <8 x half>, <8 x half>) { 1552; CHECK-LABEL: test_vld2q_lane_f16: 1553; CHECK: @ %bb.0: @ %entry 1554; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 1555; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 1556; CHECK-NEXT: vld2.16 {d1[3], d3[3]}, [r0] 1557; CHECK-NEXT: bx lr 1558entry: 1559 %3 = tail call { <8 x half>, <8 x half> } @llvm.arm.neon.vld2lane.v8f16.p0(ptr %0, <8 x half> %1, <8 x half> %2, i32 7, i32 2) 1560 ret { <8 x half>, <8 x half> } %3 1561} 1562 1563define { <4 x half>, <4 x half> } @test_vld2_lane_f16(ptr, <4 x half>, <4 x half>) { 1564; CHECK-LABEL: test_vld2_lane_f16: 1565; CHECK: @ %bb.0: @ %entry 1566; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0 1567; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0 1568; CHECK-NEXT: vld2.16 {d0[3], d1[3]}, [r0] 1569; CHECK-NEXT: bx lr 1570entry: 1571 %3 = tail call { <4 x half>, <4 x half> } @llvm.arm.neon.vld2lane.v4f16.p0(ptr %0, <4 x half> %1, <4 x half> %2, i32 3, i32 2) 1572 ret { <4 x half>, <4 x half> } %3 1573} 1574 1575define { <8 x half>, <8 x half>, <8 x half> } @test_vld3q_lane_f16(ptr, <8 x half>, <8 x half>, <8 x half>) { 1576; CHECK-LABEL: test_vld3q_lane_f16: 1577; CHECK: @ %bb.0: @ %entry 1578; CHECK-NEXT: @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1579; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1580; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1581; CHECK-NEXT: vld3.16 {d1[3], d3[3], d5[3]}, [r0] 1582; CHECK-NEXT: bx lr 1583entry: 1584 %4 = tail call { <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld3lane.v8f16.p0(ptr %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, i32 7, i32 2) 1585 ret { <8 x half>, <8 x half>, <8 x half> } %4 1586} 1587 1588define { <4 x half>, <4 x half>, <4 x half> } @test_vld3_lane_f16(ptr, <4 x half>, <4 x half>, <4 x half>) { 1589; CHECK-LABEL: test_vld3_lane_f16: 1590; CHECK: @ %bb.0: @ %entry 1591; CHECK-NEXT: @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1 1592; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 1593; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 1594; CHECK-NEXT: vld3.16 {d0[3], d1[3], d2[3]}, [r0] 1595; CHECK-NEXT: bx lr 1596entry: 1597 %4 = tail call { <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld3lane.v4f16.p0(ptr %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, i32 3, i32 2) 1598 ret { <4 x half>, <4 x half>, <4 x half> } %4 1599} 1600define { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @test_vld4lane_v8f16_p0i8(ptr, <8 x half>, <8 x half>, <8 x half>, <8 x half>) { 1601; CHECK-LABEL: test_vld4lane_v8f16_p0i8: 1602; CHECK: @ %bb.0: @ %entry 1603; CHECK-NEXT: @ kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1604; CHECK-NEXT: @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1605; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1606; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1607; CHECK-NEXT: vld4.16 {d1[3], d3[3], d5[3], d7[3]}, [r0] 1608; CHECK-NEXT: bx lr 1609entry: 1610 %5 = tail call { <8 x half>, <8 x half>, <8 x half>, <8 x half> } @llvm.arm.neon.vld4lane.v8f16.p0(ptr %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, <8 x half> %4, i32 7, i32 2) 1611 ret { <8 x half>, <8 x half>, <8 x half>, <8 x half> } %5 1612} 1613define { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @test_vld4lane_v4f16_p0i8(ptr, <4 x half>, <4 x half>, <4 x half>, <4 x half>) { 1614; CHECK-LABEL: test_vld4lane_v4f16_p0i8: 1615; CHECK: @ %bb.0: @ %entry 1616; CHECK-NEXT: @ kill: def $d3 killed $d3 killed $q0_q1 def $q0_q1 1617; CHECK-NEXT: @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1 1618; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 1619; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 1620; CHECK-NEXT: vld4.16 {d0[3], d1[3], d2[3], d3[3]}, [r0] 1621; CHECK-NEXT: bx lr 1622entry: 1623 %5 = tail call { <4 x half>, <4 x half>, <4 x half>, <4 x half> } @llvm.arm.neon.vld4lane.v4f16.p0(ptr %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, <4 x half> %4, i32 3, i32 2) 1624 ret { <4 x half>, <4 x half>, <4 x half>, <4 x half> } %5 1625} 1626define void @test_vst2lane_p0i8_v8f16(ptr, <8 x half>, <8 x half>) { 1627; CHECK-LABEL: test_vst2lane_p0i8_v8f16: 1628; CHECK: @ %bb.0: @ %entry 1629; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 1630; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 1631; CHECK-NEXT: vst2.16 {d0[0], d2[0]}, [r0] 1632; CHECK-NEXT: bx lr 1633entry: 1634 tail call void @llvm.arm.neon.vst2lane.p0.v8f16(ptr %0, <8 x half> %1, <8 x half> %2, i32 0, i32 1) 1635 ret void 1636} 1637define void @test_vst2lane_p0i8_v4f16(ptr, <4 x half>, <4 x half>) { 1638; CHECK-LABEL: test_vst2lane_p0i8_v4f16: 1639; CHECK: @ %bb.0: @ %entry 1640; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0 1641; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0 1642; CHECK-NEXT: vst2.16 {d0[0], d1[0]}, [r0:32] 1643; CHECK-NEXT: bx lr 1644entry: 1645 tail call void @llvm.arm.neon.vst2lane.p0.v4f16(ptr %0, <4 x half> %1, <4 x half> %2, i32 0, i32 0) 1646 ret void 1647} 1648define void @test_vst3lane_p0i8_v8f16(ptr, <8 x half>, <8 x half>, <8 x half>) { 1649; CHECK-LABEL: test_vst3lane_p0i8_v8f16: 1650; CHECK: @ %bb.0: @ %entry 1651; CHECK-NEXT: @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1652; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1653; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1654; CHECK-NEXT: vst3.16 {d0[0], d2[0], d4[0]}, [r0] 1655; CHECK-NEXT: bx lr 1656entry: 1657 tail call void @llvm.arm.neon.vst3lane.p0.v8f16(ptr %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, i32 0, i32 0) 1658 ret void 1659} 1660define void @test_vst3lane_p0i8_v4f16(ptr, <4 x half>, <4 x half>, <4 x half>) { 1661; CHECK-LABEL: test_vst3lane_p0i8_v4f16: 1662; CHECK: @ %bb.0: @ %entry 1663; CHECK-NEXT: @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1 1664; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 1665; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 1666; CHECK-NEXT: vst3.16 {d0[0], d1[0], d2[0]}, [r0] 1667; CHECK-NEXT: bx lr 1668entry: 1669 tail call void @llvm.arm.neon.vst3lane.p0.v4f16(ptr %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, i32 0, i32 0) 1670 ret void 1671} 1672define void @test_vst4lane_p0i8_v8f16(ptr, <8 x half>, <8 x half>, <8 x half>, <8 x half>) { 1673; CHECK-LABEL: test_vst4lane_p0i8_v8f16: 1674; CHECK: @ %bb.0: @ %entry 1675; CHECK-NEXT: @ kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1676; CHECK-NEXT: @ kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1677; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1678; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 1679; CHECK-NEXT: vst4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0:64] 1680; CHECK-NEXT: bx lr 1681entry: 1682 tail call void @llvm.arm.neon.vst4lane.p0.v8f16(ptr %0, <8 x half> %1, <8 x half> %2, <8 x half> %3, <8 x half> %4, i32 0, i32 0) 1683 ret void 1684} 1685define void @test_vst4lane_p0i8_v4f16(ptr, <4 x half>, <4 x half>, <4 x half>, <4 x half>) { 1686; CHECK-LABEL: test_vst4lane_p0i8_v4f16: 1687; CHECK: @ %bb.0: @ %entry 1688; CHECK-NEXT: @ kill: def $d3 killed $d3 killed $q0_q1 def $q0_q1 1689; CHECK-NEXT: @ kill: def $d2 killed $d2 killed $q0_q1 def $q0_q1 1690; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 1691; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 1692; CHECK-NEXT: vst4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0:64] 1693; CHECK-NEXT: bx lr 1694entry: 1695 tail call void @llvm.arm.neon.vst4lane.p0.v4f16(ptr %0, <4 x half> %1, <4 x half> %2, <4 x half> %3, <4 x half> %4, i32 0, i32 0) 1696 ret void 1697} 1698