1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s 3 4define arm_aapcs_vfpcc <8 x half> @test_vld1q_f16(ptr %base) { 5; CHECK-LABEL: test_vld1q_f16: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vldrh.u16 q0, [r0] 8; CHECK-NEXT: bx lr 9entry: 10 %0 = load <8 x half>, ptr %base, align 2 11 ret <8 x half> %0 12} 13 14define arm_aapcs_vfpcc <4 x float> @test_vld1q_f32(ptr %base) { 15; CHECK-LABEL: test_vld1q_f32: 16; CHECK: @ %bb.0: @ %entry 17; CHECK-NEXT: vldrw.u32 q0, [r0] 18; CHECK-NEXT: bx lr 19entry: 20 %0 = load <4 x float>, ptr %base, align 4 21 ret <4 x float> %0 22} 23 24define arm_aapcs_vfpcc <16 x i8> @test_vld1q_s8(ptr %base) { 25; CHECK-LABEL: test_vld1q_s8: 26; CHECK: @ %bb.0: @ %entry 27; CHECK-NEXT: vldrb.u8 q0, [r0] 28; CHECK-NEXT: bx lr 29entry: 30 %0 = load <16 x i8>, ptr %base, align 1 31 ret <16 x i8> %0 32} 33 34define arm_aapcs_vfpcc <8 x i16> @test_vld1q_s16(ptr %base) { 35; CHECK-LABEL: test_vld1q_s16: 36; CHECK: @ %bb.0: @ %entry 37; CHECK-NEXT: vldrh.u16 q0, [r0] 38; CHECK-NEXT: bx lr 39entry: 40 %0 = load <8 x i16>, ptr %base, align 2 41 ret <8 x i16> %0 42} 43 44define arm_aapcs_vfpcc <4 x i32> @test_vld1q_s32(ptr %base) { 45; CHECK-LABEL: test_vld1q_s32: 46; CHECK: @ %bb.0: @ %entry 47; CHECK-NEXT: vldrw.u32 q0, [r0] 48; CHECK-NEXT: bx lr 49entry: 50 %0 = load <4 x i32>, ptr %base, align 4 51 ret <4 x i32> %0 52} 53 54define arm_aapcs_vfpcc <16 x i8> @test_vld1q_u8(ptr %base) { 55; CHECK-LABEL: test_vld1q_u8: 56; CHECK: @ %bb.0: @ %entry 57; CHECK-NEXT: vldrb.u8 q0, [r0] 58; CHECK-NEXT: bx lr 59entry: 60 %0 = load <16 x i8>, ptr %base, align 1 61 ret <16 x i8> %0 62} 63 64define arm_aapcs_vfpcc <8 x i16> @test_vld1q_u16(ptr %base) { 65; CHECK-LABEL: test_vld1q_u16: 66; CHECK: @ %bb.0: @ %entry 67; CHECK-NEXT: vldrh.u16 q0, [r0] 68; CHECK-NEXT: bx lr 69entry: 70 %0 = load <8 x i16>, ptr %base, align 2 71 ret <8 x i16> %0 72} 73 74define arm_aapcs_vfpcc <4 x i32> @test_vld1q_u32(ptr %base) { 75; CHECK-LABEL: test_vld1q_u32: 76; CHECK: @ %bb.0: @ %entry 77; CHECK-NEXT: vldrw.u32 q0, [r0] 78; CHECK-NEXT: bx lr 79entry: 80 %0 = load <4 x i32>, ptr %base, align 4 81 ret <4 x i32> %0 82} 83 84define arm_aapcs_vfpcc <8 x half> @test_vld1q_z_f16(ptr %base, i16 zeroext %p) { 85; CHECK-LABEL: test_vld1q_z_f16: 86; CHECK: @ %bb.0: @ %entry 87; CHECK-NEXT: vmsr p0, r1 88; CHECK-NEXT: vpst 89; CHECK-NEXT: vldrht.u16 q0, [r0] 90; CHECK-NEXT: bx lr 91entry: 92 %0 = zext i16 %p to i32 93 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 94 %2 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %base, i32 2, <8 x i1> %1, <8 x half> zeroinitializer) 95 ret <8 x half> %2 96} 97 98declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) 99 100declare <8 x half> @llvm.masked.load.v8f16.p0(ptr, i32 immarg, <8 x i1>, <8 x half>) 101 102define arm_aapcs_vfpcc <4 x float> @test_vld1q_z_f32(ptr %base, i16 zeroext %p) { 103; CHECK-LABEL: test_vld1q_z_f32: 104; CHECK: @ %bb.0: @ %entry 105; CHECK-NEXT: vmsr p0, r1 106; CHECK-NEXT: vpst 107; CHECK-NEXT: vldrwt.u32 q0, [r0] 108; CHECK-NEXT: bx lr 109entry: 110 %0 = zext i16 %p to i32 111 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 112 %2 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %base, i32 4, <4 x i1> %1, <4 x float> zeroinitializer) 113 ret <4 x float> %2 114} 115 116declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) 117 118declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32 immarg, <4 x i1>, <4 x float>) 119 120define arm_aapcs_vfpcc <16 x i8> @test_vld1q_z_s8(ptr %base, i16 zeroext %p) { 121; CHECK-LABEL: test_vld1q_z_s8: 122; CHECK: @ %bb.0: @ %entry 123; CHECK-NEXT: vmsr p0, r1 124; CHECK-NEXT: vpst 125; CHECK-NEXT: vldrbt.u8 q0, [r0] 126; CHECK-NEXT: bx lr 127entry: 128 %0 = zext i16 %p to i32 129 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 130 %2 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %base, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) 131 ret <16 x i8> %2 132} 133 134declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) 135 136declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32 immarg, <16 x i1>, <16 x i8>) 137 138define arm_aapcs_vfpcc <8 x i16> @test_vld1q_z_s16(ptr %base, i16 zeroext %p) { 139; CHECK-LABEL: test_vld1q_z_s16: 140; CHECK: @ %bb.0: @ %entry 141; CHECK-NEXT: vmsr p0, r1 142; CHECK-NEXT: vpst 143; CHECK-NEXT: vldrht.u16 q0, [r0] 144; CHECK-NEXT: bx lr 145entry: 146 %0 = zext i16 %p to i32 147 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 148 %2 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %base, i32 2, <8 x i1> %1, <8 x i16> zeroinitializer) 149 ret <8 x i16> %2 150} 151 152declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32 immarg, <8 x i1>, <8 x i16>) 153 154define arm_aapcs_vfpcc <4 x i32> @test_vld1q_z_s32(ptr %base, i16 zeroext %p) { 155; CHECK-LABEL: test_vld1q_z_s32: 156; CHECK: @ %bb.0: @ %entry 157; CHECK-NEXT: vmsr p0, r1 158; CHECK-NEXT: vpst 159; CHECK-NEXT: vldrwt.u32 q0, [r0] 160; CHECK-NEXT: bx lr 161entry: 162 %0 = zext i16 %p to i32 163 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 164 %2 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %base, i32 4, <4 x i1> %1, <4 x i32> zeroinitializer) 165 ret <4 x i32> %2 166} 167 168declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32 immarg, <4 x i1>, <4 x i32>) 169 170define arm_aapcs_vfpcc <16 x i8> @test_vld1q_z_u8(ptr %base, i16 zeroext %p) { 171; CHECK-LABEL: test_vld1q_z_u8: 172; CHECK: @ %bb.0: @ %entry 173; CHECK-NEXT: vmsr p0, r1 174; CHECK-NEXT: vpst 175; CHECK-NEXT: vldrbt.u8 q0, [r0] 176; CHECK-NEXT: bx lr 177entry: 178 %0 = zext i16 %p to i32 179 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 180 %2 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %base, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) 181 ret <16 x i8> %2 182} 183 184define arm_aapcs_vfpcc <8 x i16> @test_vld1q_z_u16(ptr %base, i16 zeroext %p) { 185; CHECK-LABEL: test_vld1q_z_u16: 186; CHECK: @ %bb.0: @ %entry 187; CHECK-NEXT: vmsr p0, r1 188; CHECK-NEXT: vpst 189; CHECK-NEXT: vldrht.u16 q0, [r0] 190; CHECK-NEXT: bx lr 191entry: 192 %0 = zext i16 %p to i32 193 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 194 %2 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %base, i32 2, <8 x i1> %1, <8 x i16> zeroinitializer) 195 ret <8 x i16> %2 196} 197 198define arm_aapcs_vfpcc <4 x i32> @test_vld1q_z_u32(ptr %base, i16 zeroext %p) { 199; CHECK-LABEL: test_vld1q_z_u32: 200; CHECK: @ %bb.0: @ %entry 201; CHECK-NEXT: vmsr p0, r1 202; CHECK-NEXT: vpst 203; CHECK-NEXT: vldrwt.u32 q0, [r0] 204; CHECK-NEXT: bx lr 205entry: 206 %0 = zext i16 %p to i32 207 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 208 %2 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %base, i32 4, <4 x i1> %1, <4 x i32> zeroinitializer) 209 ret <4 x i32> %2 210} 211 212define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_s8(ptr %base) { 213; CHECK-LABEL: test_vldrbq_s8: 214; CHECK: @ %bb.0: @ %entry 215; CHECK-NEXT: vldrb.u8 q0, [r0] 216; CHECK-NEXT: bx lr 217entry: 218 %0 = load <16 x i8>, ptr %base, align 1 219 ret <16 x i8> %0 220} 221 222define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_s16(ptr %base) { 223; CHECK-LABEL: test_vldrbq_s16: 224; CHECK: @ %bb.0: @ %entry 225; CHECK-NEXT: vldrb.s16 q0, [r0] 226; CHECK-NEXT: bx lr 227entry: 228 %0 = load <8 x i8>, ptr %base, align 1 229 %1 = sext <8 x i8> %0 to <8 x i16> 230 ret <8 x i16> %1 231} 232 233define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_s32(ptr %base) { 234; CHECK-LABEL: test_vldrbq_s32: 235; CHECK: @ %bb.0: @ %entry 236; CHECK-NEXT: vldrb.s32 q0, [r0] 237; CHECK-NEXT: bx lr 238entry: 239 %0 = load <4 x i8>, ptr %base, align 1 240 %1 = sext <4 x i8> %0 to <4 x i32> 241 ret <4 x i32> %1 242} 243 244define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_u8(ptr %base) { 245; CHECK-LABEL: test_vldrbq_u8: 246; CHECK: @ %bb.0: @ %entry 247; CHECK-NEXT: vldrb.u8 q0, [r0] 248; CHECK-NEXT: bx lr 249entry: 250 %0 = load <16 x i8>, ptr %base, align 1 251 ret <16 x i8> %0 252} 253 254define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_u16(ptr %base) { 255; CHECK-LABEL: test_vldrbq_u16: 256; CHECK: @ %bb.0: @ %entry 257; CHECK-NEXT: vldrb.u16 q0, [r0] 258; CHECK-NEXT: bx lr 259entry: 260 %0 = load <8 x i8>, ptr %base, align 1 261 %1 = zext <8 x i8> %0 to <8 x i16> 262 ret <8 x i16> %1 263} 264 265define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_u32(ptr %base) { 266; CHECK-LABEL: test_vldrbq_u32: 267; CHECK: @ %bb.0: @ %entry 268; CHECK-NEXT: vldrb.u32 q0, [r0] 269; CHECK-NEXT: bx lr 270entry: 271 %0 = load <4 x i8>, ptr %base, align 1 272 %1 = zext <4 x i8> %0 to <4 x i32> 273 ret <4 x i32> %1 274} 275 276define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_z_s8(ptr %base, i16 zeroext %p) { 277; CHECK-LABEL: test_vldrbq_z_s8: 278; CHECK: @ %bb.0: @ %entry 279; CHECK-NEXT: vmsr p0, r1 280; CHECK-NEXT: vpst 281; CHECK-NEXT: vldrbt.u8 q0, [r0] 282; CHECK-NEXT: bx lr 283entry: 284 %0 = zext i16 %p to i32 285 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 286 %2 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %base, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) 287 ret <16 x i8> %2 288} 289 290define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_z_s16(ptr %base, i16 zeroext %p) { 291; CHECK-LABEL: test_vldrbq_z_s16: 292; CHECK: @ %bb.0: @ %entry 293; CHECK-NEXT: vmsr p0, r1 294; CHECK-NEXT: vpst 295; CHECK-NEXT: vldrbt.s16 q0, [r0] 296; CHECK-NEXT: bx lr 297entry: 298 %0 = zext i16 %p to i32 299 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 300 %2 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %base, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) 301 %3 = sext <8 x i8> %2 to <8 x i16> 302 ret <8 x i16> %3 303} 304 305declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32 immarg, <8 x i1>, <8 x i8>) 306 307define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_z_s32(ptr %base, i16 zeroext %p) { 308; CHECK-LABEL: test_vldrbq_z_s32: 309; CHECK: @ %bb.0: @ %entry 310; CHECK-NEXT: vmsr p0, r1 311; CHECK-NEXT: vpst 312; CHECK-NEXT: vldrbt.s32 q0, [r0] 313; CHECK-NEXT: bx lr 314entry: 315 %0 = zext i16 %p to i32 316 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 317 %2 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %base, i32 1, <4 x i1> %1, <4 x i8> zeroinitializer) 318 %3 = sext <4 x i8> %2 to <4 x i32> 319 ret <4 x i32> %3 320} 321 322declare <4 x i8> @llvm.masked.load.v4i8.p0(ptr, i32 immarg, <4 x i1>, <4 x i8>) 323 324define arm_aapcs_vfpcc <16 x i8> @test_vldrbq_z_u8(ptr %base, i16 zeroext %p) { 325; CHECK-LABEL: test_vldrbq_z_u8: 326; CHECK: @ %bb.0: @ %entry 327; CHECK-NEXT: vmsr p0, r1 328; CHECK-NEXT: vpst 329; CHECK-NEXT: vldrbt.u8 q0, [r0] 330; CHECK-NEXT: bx lr 331entry: 332 %0 = zext i16 %p to i32 333 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 334 %2 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr %base, i32 1, <16 x i1> %1, <16 x i8> zeroinitializer) 335 ret <16 x i8> %2 336} 337 338define arm_aapcs_vfpcc <8 x i16> @test_vldrbq_z_u16(ptr %base, i16 zeroext %p) { 339; CHECK-LABEL: test_vldrbq_z_u16: 340; CHECK: @ %bb.0: @ %entry 341; CHECK-NEXT: vmsr p0, r1 342; CHECK-NEXT: vpst 343; CHECK-NEXT: vldrbt.u16 q0, [r0] 344; CHECK-NEXT: bx lr 345entry: 346 %0 = zext i16 %p to i32 347 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 348 %2 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %base, i32 1, <8 x i1> %1, <8 x i8> zeroinitializer) 349 %3 = zext <8 x i8> %2 to <8 x i16> 350 ret <8 x i16> %3 351} 352 353define arm_aapcs_vfpcc <4 x i32> @test_vldrbq_z_u32(ptr %base, i16 zeroext %p) { 354; CHECK-LABEL: test_vldrbq_z_u32: 355; CHECK: @ %bb.0: @ %entry 356; CHECK-NEXT: vmsr p0, r1 357; CHECK-NEXT: vpst 358; CHECK-NEXT: vldrbt.u32 q0, [r0] 359; CHECK-NEXT: bx lr 360entry: 361 %0 = zext i16 %p to i32 362 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 363 %2 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr %base, i32 1, <4 x i1> %1, <4 x i8> zeroinitializer) 364 %3 = zext <4 x i8> %2 to <4 x i32> 365 ret <4 x i32> %3 366} 367 368define arm_aapcs_vfpcc <8 x half> @test_vldrhq_f16(ptr %base) { 369; CHECK-LABEL: test_vldrhq_f16: 370; CHECK: @ %bb.0: @ %entry 371; CHECK-NEXT: vldrh.u16 q0, [r0] 372; CHECK-NEXT: bx lr 373entry: 374 %0 = load <8 x half>, ptr %base, align 2 375 ret <8 x half> %0 376} 377 378define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_s16(ptr %base) { 379; CHECK-LABEL: test_vldrhq_s16: 380; CHECK: @ %bb.0: @ %entry 381; CHECK-NEXT: vldrh.u16 q0, [r0] 382; CHECK-NEXT: bx lr 383entry: 384 %0 = load <8 x i16>, ptr %base, align 2 385 ret <8 x i16> %0 386} 387 388define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_s32(ptr %base) { 389; CHECK-LABEL: test_vldrhq_s32: 390; CHECK: @ %bb.0: @ %entry 391; CHECK-NEXT: vldrh.s32 q0, [r0] 392; CHECK-NEXT: bx lr 393entry: 394 %0 = load <4 x i16>, ptr %base, align 2 395 %1 = sext <4 x i16> %0 to <4 x i32> 396 ret <4 x i32> %1 397} 398 399define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_u16(ptr %base) { 400; CHECK-LABEL: test_vldrhq_u16: 401; CHECK: @ %bb.0: @ %entry 402; CHECK-NEXT: vldrh.u16 q0, [r0] 403; CHECK-NEXT: bx lr 404entry: 405 %0 = load <8 x i16>, ptr %base, align 2 406 ret <8 x i16> %0 407} 408 409define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_u32(ptr %base) { 410; CHECK-LABEL: test_vldrhq_u32: 411; CHECK: @ %bb.0: @ %entry 412; CHECK-NEXT: vldrh.u32 q0, [r0] 413; CHECK-NEXT: bx lr 414entry: 415 %0 = load <4 x i16>, ptr %base, align 2 416 %1 = zext <4 x i16> %0 to <4 x i32> 417 ret <4 x i32> %1 418} 419 420define arm_aapcs_vfpcc <8 x half> @test_vldrhq_z_f16(ptr %base, i16 zeroext %p) { 421; CHECK-LABEL: test_vldrhq_z_f16: 422; CHECK: @ %bb.0: @ %entry 423; CHECK-NEXT: vmsr p0, r1 424; CHECK-NEXT: vpst 425; CHECK-NEXT: vldrht.u16 q0, [r0] 426; CHECK-NEXT: bx lr 427entry: 428 %0 = zext i16 %p to i32 429 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 430 %2 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr %base, i32 2, <8 x i1> %1, <8 x half> zeroinitializer) 431 ret <8 x half> %2 432} 433 434define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_z_s16(ptr %base, i16 zeroext %p) { 435; CHECK-LABEL: test_vldrhq_z_s16: 436; CHECK: @ %bb.0: @ %entry 437; CHECK-NEXT: vmsr p0, r1 438; CHECK-NEXT: vpst 439; CHECK-NEXT: vldrht.u16 q0, [r0] 440; CHECK-NEXT: bx lr 441entry: 442 %0 = zext i16 %p to i32 443 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 444 %2 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %base, i32 2, <8 x i1> %1, <8 x i16> zeroinitializer) 445 ret <8 x i16> %2 446} 447 448define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_z_s32(ptr %base, i16 zeroext %p) { 449; CHECK-LABEL: test_vldrhq_z_s32: 450; CHECK: @ %bb.0: @ %entry 451; CHECK-NEXT: vmsr p0, r1 452; CHECK-NEXT: vpst 453; CHECK-NEXT: vldrht.s32 q0, [r0] 454; CHECK-NEXT: bx lr 455entry: 456 %0 = zext i16 %p to i32 457 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 458 %2 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %base, i32 2, <4 x i1> %1, <4 x i16> zeroinitializer) 459 %3 = sext <4 x i16> %2 to <4 x i32> 460 ret <4 x i32> %3 461} 462 463declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32 immarg, <4 x i1>, <4 x i16>) 464 465define arm_aapcs_vfpcc <8 x i16> @test_vldrhq_z_u16(ptr %base, i16 zeroext %p) { 466; CHECK-LABEL: test_vldrhq_z_u16: 467; CHECK: @ %bb.0: @ %entry 468; CHECK-NEXT: vmsr p0, r1 469; CHECK-NEXT: vpst 470; CHECK-NEXT: vldrht.u16 q0, [r0] 471; CHECK-NEXT: bx lr 472entry: 473 %0 = zext i16 %p to i32 474 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 475 %2 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr %base, i32 2, <8 x i1> %1, <8 x i16> zeroinitializer) 476 ret <8 x i16> %2 477} 478 479define arm_aapcs_vfpcc <4 x i32> @test_vldrhq_z_u32(ptr %base, i16 zeroext %p) { 480; CHECK-LABEL: test_vldrhq_z_u32: 481; CHECK: @ %bb.0: @ %entry 482; CHECK-NEXT: vmsr p0, r1 483; CHECK-NEXT: vpst 484; CHECK-NEXT: vldrht.u32 q0, [r0] 485; CHECK-NEXT: bx lr 486entry: 487 %0 = zext i16 %p to i32 488 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 489 %2 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr %base, i32 2, <4 x i1> %1, <4 x i16> zeroinitializer) 490 %3 = zext <4 x i16> %2 to <4 x i32> 491 ret <4 x i32> %3 492} 493 494define arm_aapcs_vfpcc <4 x float> @test_vldrwq_f32(ptr %base) { 495; CHECK-LABEL: test_vldrwq_f32: 496; CHECK: @ %bb.0: @ %entry 497; CHECK-NEXT: vldrw.u32 q0, [r0] 498; CHECK-NEXT: bx lr 499entry: 500 %0 = load <4 x float>, ptr %base, align 4 501 ret <4 x float> %0 502} 503 504define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_s32(ptr %base) { 505; CHECK-LABEL: test_vldrwq_s32: 506; CHECK: @ %bb.0: @ %entry 507; CHECK-NEXT: vldrw.u32 q0, [r0] 508; CHECK-NEXT: bx lr 509entry: 510 %0 = load <4 x i32>, ptr %base, align 4 511 ret <4 x i32> %0 512} 513 514define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_u32(ptr %base) { 515; CHECK-LABEL: test_vldrwq_u32: 516; CHECK: @ %bb.0: @ %entry 517; CHECK-NEXT: vldrw.u32 q0, [r0] 518; CHECK-NEXT: bx lr 519entry: 520 %0 = load <4 x i32>, ptr %base, align 4 521 ret <4 x i32> %0 522} 523 524define arm_aapcs_vfpcc <4 x float> @test_vldrwq_z_f32(ptr %base, i16 zeroext %p) { 525; CHECK-LABEL: test_vldrwq_z_f32: 526; CHECK: @ %bb.0: @ %entry 527; CHECK-NEXT: vmsr p0, r1 528; CHECK-NEXT: vpst 529; CHECK-NEXT: vldrwt.u32 q0, [r0] 530; CHECK-NEXT: bx lr 531entry: 532 %0 = zext i16 %p to i32 533 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 534 %2 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr %base, i32 4, <4 x i1> %1, <4 x float> zeroinitializer) 535 ret <4 x float> %2 536} 537 538define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_z_s32(ptr %base, i16 zeroext %p) { 539; CHECK-LABEL: test_vldrwq_z_s32: 540; CHECK: @ %bb.0: @ %entry 541; CHECK-NEXT: vmsr p0, r1 542; CHECK-NEXT: vpst 543; CHECK-NEXT: vldrwt.u32 q0, [r0] 544; CHECK-NEXT: bx lr 545entry: 546 %0 = zext i16 %p to i32 547 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 548 %2 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %base, i32 4, <4 x i1> %1, <4 x i32> zeroinitializer) 549 ret <4 x i32> %2 550} 551 552define arm_aapcs_vfpcc <4 x i32> @test_vldrwq_z_u32(ptr %base, i16 zeroext %p) { 553; CHECK-LABEL: test_vldrwq_z_u32: 554; CHECK: @ %bb.0: @ %entry 555; CHECK-NEXT: vmsr p0, r1 556; CHECK-NEXT: vpst 557; CHECK-NEXT: vldrwt.u32 q0, [r0] 558; CHECK-NEXT: bx lr 559entry: 560 %0 = zext i16 %p to i32 561 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 562 %2 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %base, i32 4, <4 x i1> %1, <4 x i32> zeroinitializer) 563 ret <4 x i32> %2 564} 565 566define arm_aapcs_vfpcc void @test_vst1q_f16(ptr %base, <8 x half> %value) { 567; CHECK-LABEL: test_vst1q_f16: 568; CHECK: @ %bb.0: @ %entry 569; CHECK-NEXT: vstrh.16 q0, [r0] 570; CHECK-NEXT: bx lr 571entry: 572 store <8 x half> %value, ptr %base, align 2 573 ret void 574} 575 576define arm_aapcs_vfpcc void @test_vst1q_f32(ptr %base, <4 x float> %value) { 577; CHECK-LABEL: test_vst1q_f32: 578; CHECK: @ %bb.0: @ %entry 579; CHECK-NEXT: vstrw.32 q0, [r0] 580; CHECK-NEXT: bx lr 581entry: 582 store <4 x float> %value, ptr %base, align 4 583 ret void 584} 585 586define arm_aapcs_vfpcc void @test_vst1q_s8(ptr %base, <16 x i8> %value) { 587; CHECK-LABEL: test_vst1q_s8: 588; CHECK: @ %bb.0: @ %entry 589; CHECK-NEXT: vstrb.8 q0, [r0] 590; CHECK-NEXT: bx lr 591entry: 592 store <16 x i8> %value, ptr %base, align 1 593 ret void 594} 595 596define arm_aapcs_vfpcc void @test_vst1q_s16(ptr %base, <8 x i16> %value) { 597; CHECK-LABEL: test_vst1q_s16: 598; CHECK: @ %bb.0: @ %entry 599; CHECK-NEXT: vstrh.16 q0, [r0] 600; CHECK-NEXT: bx lr 601entry: 602 store <8 x i16> %value, ptr %base, align 2 603 ret void 604} 605 606define arm_aapcs_vfpcc void @test_vst1q_s32(ptr %base, <4 x i32> %value) { 607; CHECK-LABEL: test_vst1q_s32: 608; CHECK: @ %bb.0: @ %entry 609; CHECK-NEXT: vstrw.32 q0, [r0] 610; CHECK-NEXT: bx lr 611entry: 612 store <4 x i32> %value, ptr %base, align 4 613 ret void 614} 615 616define arm_aapcs_vfpcc void @test_vst1q_u8(ptr %base, <16 x i8> %value) { 617; CHECK-LABEL: test_vst1q_u8: 618; CHECK: @ %bb.0: @ %entry 619; CHECK-NEXT: vstrb.8 q0, [r0] 620; CHECK-NEXT: bx lr 621entry: 622 store <16 x i8> %value, ptr %base, align 1 623 ret void 624} 625 626define arm_aapcs_vfpcc void @test_vst1q_u16(ptr %base, <8 x i16> %value) { 627; CHECK-LABEL: test_vst1q_u16: 628; CHECK: @ %bb.0: @ %entry 629; CHECK-NEXT: vstrh.16 q0, [r0] 630; CHECK-NEXT: bx lr 631entry: 632 store <8 x i16> %value, ptr %base, align 2 633 ret void 634} 635 636define arm_aapcs_vfpcc void @test_vst1q_u32(ptr %base, <4 x i32> %value) { 637; CHECK-LABEL: test_vst1q_u32: 638; CHECK: @ %bb.0: @ %entry 639; CHECK-NEXT: vstrw.32 q0, [r0] 640; CHECK-NEXT: bx lr 641entry: 642 store <4 x i32> %value, ptr %base, align 4 643 ret void 644} 645 646define arm_aapcs_vfpcc void @test_vst1q_p_f16(ptr %base, <8 x half> %value, i16 zeroext %p) { 647; CHECK-LABEL: test_vst1q_p_f16: 648; CHECK: @ %bb.0: @ %entry 649; CHECK-NEXT: vmsr p0, r1 650; CHECK-NEXT: vpst 651; CHECK-NEXT: vstrht.16 q0, [r0] 652; CHECK-NEXT: bx lr 653entry: 654 %0 = zext i16 %p to i32 655 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 656 call void @llvm.masked.store.v8f16.p0(<8 x half> %value, ptr %base, i32 2, <8 x i1> %1) 657 ret void 658} 659 660declare void @llvm.masked.store.v8f16.p0(<8 x half>, ptr, i32 immarg, <8 x i1>) 661 662define arm_aapcs_vfpcc void @test_vst1q_p_f32(ptr %base, <4 x float> %value, i16 zeroext %p) { 663; CHECK-LABEL: test_vst1q_p_f32: 664; CHECK: @ %bb.0: @ %entry 665; CHECK-NEXT: vmsr p0, r1 666; CHECK-NEXT: vpst 667; CHECK-NEXT: vstrwt.32 q0, [r0] 668; CHECK-NEXT: bx lr 669entry: 670 %0 = zext i16 %p to i32 671 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 672 call void @llvm.masked.store.v4f32.p0(<4 x float> %value, ptr %base, i32 4, <4 x i1> %1) 673 ret void 674} 675 676declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32 immarg, <4 x i1>) 677 678define arm_aapcs_vfpcc void @test_vst1q_p_s8(ptr %base, <16 x i8> %value, i16 zeroext %p) { 679; CHECK-LABEL: test_vst1q_p_s8: 680; CHECK: @ %bb.0: @ %entry 681; CHECK-NEXT: vmsr p0, r1 682; CHECK-NEXT: vpst 683; CHECK-NEXT: vstrbt.8 q0, [r0] 684; CHECK-NEXT: bx lr 685entry: 686 %0 = zext i16 %p to i32 687 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 688 call void @llvm.masked.store.v16i8.p0(<16 x i8> %value, ptr %base, i32 1, <16 x i1> %1) 689 ret void 690} 691 692declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>) 693 694define arm_aapcs_vfpcc void @test_vst1q_p_s16(ptr %base, <8 x i16> %value, i16 zeroext %p) { 695; CHECK-LABEL: test_vst1q_p_s16: 696; CHECK: @ %bb.0: @ %entry 697; CHECK-NEXT: vmsr p0, r1 698; CHECK-NEXT: vpst 699; CHECK-NEXT: vstrht.16 q0, [r0] 700; CHECK-NEXT: bx lr 701entry: 702 %0 = zext i16 %p to i32 703 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 704 call void @llvm.masked.store.v8i16.p0(<8 x i16> %value, ptr %base, i32 2, <8 x i1> %1) 705 ret void 706} 707 708declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32 immarg, <8 x i1>) 709 710define arm_aapcs_vfpcc void @test_vst1q_p_s32(ptr %base, <4 x i32> %value, i16 zeroext %p) { 711; CHECK-LABEL: test_vst1q_p_s32: 712; CHECK: @ %bb.0: @ %entry 713; CHECK-NEXT: vmsr p0, r1 714; CHECK-NEXT: vpst 715; CHECK-NEXT: vstrwt.32 q0, [r0] 716; CHECK-NEXT: bx lr 717entry: 718 %0 = zext i16 %p to i32 719 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 720 call void @llvm.masked.store.v4i32.p0(<4 x i32> %value, ptr %base, i32 4, <4 x i1> %1) 721 ret void 722} 723 724declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32 immarg, <4 x i1>) 725 726define arm_aapcs_vfpcc void @test_vst1q_p_u8(ptr %base, <16 x i8> %value, i16 zeroext %p) { 727; CHECK-LABEL: test_vst1q_p_u8: 728; CHECK: @ %bb.0: @ %entry 729; CHECK-NEXT: vmsr p0, r1 730; CHECK-NEXT: vpst 731; CHECK-NEXT: vstrbt.8 q0, [r0] 732; CHECK-NEXT: bx lr 733entry: 734 %0 = zext i16 %p to i32 735 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 736 call void @llvm.masked.store.v16i8.p0(<16 x i8> %value, ptr %base, i32 1, <16 x i1> %1) 737 ret void 738} 739 740define arm_aapcs_vfpcc void @test_vst1q_p_u16(ptr %base, <8 x i16> %value, i16 zeroext %p) { 741; CHECK-LABEL: test_vst1q_p_u16: 742; CHECK: @ %bb.0: @ %entry 743; CHECK-NEXT: vmsr p0, r1 744; CHECK-NEXT: vpst 745; CHECK-NEXT: vstrht.16 q0, [r0] 746; CHECK-NEXT: bx lr 747entry: 748 %0 = zext i16 %p to i32 749 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 750 call void @llvm.masked.store.v8i16.p0(<8 x i16> %value, ptr %base, i32 2, <8 x i1> %1) 751 ret void 752} 753 754define arm_aapcs_vfpcc void @test_vst1q_p_u32(ptr %base, <4 x i32> %value, i16 zeroext %p) { 755; CHECK-LABEL: test_vst1q_p_u32: 756; CHECK: @ %bb.0: @ %entry 757; CHECK-NEXT: vmsr p0, r1 758; CHECK-NEXT: vpst 759; CHECK-NEXT: vstrwt.32 q0, [r0] 760; CHECK-NEXT: bx lr 761entry: 762 %0 = zext i16 %p to i32 763 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 764 call void @llvm.masked.store.v4i32.p0(<4 x i32> %value, ptr %base, i32 4, <4 x i1> %1) 765 ret void 766} 767 768define arm_aapcs_vfpcc void @test_vstrbq_s8(ptr %base, <16 x i8> %value) { 769; CHECK-LABEL: test_vstrbq_s8: 770; CHECK: @ %bb.0: @ %entry 771; CHECK-NEXT: vstrb.8 q0, [r0] 772; CHECK-NEXT: bx lr 773entry: 774 store <16 x i8> %value, ptr %base, align 1 775 ret void 776} 777 778define arm_aapcs_vfpcc void @test_vstrbq_s16(ptr %base, <8 x i16> %value) { 779; CHECK-LABEL: test_vstrbq_s16: 780; CHECK: @ %bb.0: @ %entry 781; CHECK-NEXT: vstrb.16 q0, [r0] 782; CHECK-NEXT: bx lr 783entry: 784 %0 = trunc <8 x i16> %value to <8 x i8> 785 store <8 x i8> %0, ptr %base, align 1 786 ret void 787} 788 789define arm_aapcs_vfpcc void @test_vstrbq_s32(ptr %base, <4 x i32> %value) { 790; CHECK-LABEL: test_vstrbq_s32: 791; CHECK: @ %bb.0: @ %entry 792; CHECK-NEXT: vstrb.32 q0, [r0] 793; CHECK-NEXT: bx lr 794entry: 795 %0 = trunc <4 x i32> %value to <4 x i8> 796 store <4 x i8> %0, ptr %base, align 1 797 ret void 798} 799 800define arm_aapcs_vfpcc void @test_vstrbq_u8(ptr %base, <16 x i8> %value) { 801; CHECK-LABEL: test_vstrbq_u8: 802; CHECK: @ %bb.0: @ %entry 803; CHECK-NEXT: vstrb.8 q0, [r0] 804; CHECK-NEXT: bx lr 805entry: 806 store <16 x i8> %value, ptr %base, align 1 807 ret void 808} 809 810define arm_aapcs_vfpcc void @test_vstrbq_u16(ptr %base, <8 x i16> %value) { 811; CHECK-LABEL: test_vstrbq_u16: 812; CHECK: @ %bb.0: @ %entry 813; CHECK-NEXT: vstrb.16 q0, [r0] 814; CHECK-NEXT: bx lr 815entry: 816 %0 = trunc <8 x i16> %value to <8 x i8> 817 store <8 x i8> %0, ptr %base, align 1 818 ret void 819} 820 821define arm_aapcs_vfpcc void @test_vstrbq_u32(ptr %base, <4 x i32> %value) { 822; CHECK-LABEL: test_vstrbq_u32: 823; CHECK: @ %bb.0: @ %entry 824; CHECK-NEXT: vstrb.32 q0, [r0] 825; CHECK-NEXT: bx lr 826entry: 827 %0 = trunc <4 x i32> %value to <4 x i8> 828 store <4 x i8> %0, ptr %base, align 1 829 ret void 830} 831 832define arm_aapcs_vfpcc void @test_vstrbq_p_s8(ptr %base, <16 x i8> %value, i16 zeroext %p) { 833; CHECK-LABEL: test_vstrbq_p_s8: 834; CHECK: @ %bb.0: @ %entry 835; CHECK-NEXT: vmsr p0, r1 836; CHECK-NEXT: vpst 837; CHECK-NEXT: vstrbt.8 q0, [r0] 838; CHECK-NEXT: bx lr 839entry: 840 %0 = zext i16 %p to i32 841 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 842 call void @llvm.masked.store.v16i8.p0(<16 x i8> %value, ptr %base, i32 1, <16 x i1> %1) 843 ret void 844} 845 846define arm_aapcs_vfpcc void @test_vstrbq_p_s16(ptr %base, <8 x i16> %value, i16 zeroext %p) { 847; CHECK-LABEL: test_vstrbq_p_s16: 848; CHECK: @ %bb.0: @ %entry 849; CHECK-NEXT: vmsr p0, r1 850; CHECK-NEXT: vpst 851; CHECK-NEXT: vstrbt.16 q0, [r0] 852; CHECK-NEXT: bx lr 853entry: 854 %0 = trunc <8 x i16> %value to <8 x i8> 855 %1 = zext i16 %p to i32 856 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 857 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %base, i32 1, <8 x i1> %2) 858 ret void 859} 860 861declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32 immarg, <8 x i1>) 862 863define arm_aapcs_vfpcc void @test_vstrbq_p_s32(ptr %base, <4 x i32> %value, i16 zeroext %p) { 864; CHECK-LABEL: test_vstrbq_p_s32: 865; CHECK: @ %bb.0: @ %entry 866; CHECK-NEXT: vmsr p0, r1 867; CHECK-NEXT: vpst 868; CHECK-NEXT: vstrbt.32 q0, [r0] 869; CHECK-NEXT: bx lr 870entry: 871 %0 = trunc <4 x i32> %value to <4 x i8> 872 %1 = zext i16 %p to i32 873 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 874 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %base, i32 1, <4 x i1> %2) 875 ret void 876} 877 878declare void @llvm.masked.store.v4i8.p0(<4 x i8>, ptr, i32 immarg, <4 x i1>) 879 880define arm_aapcs_vfpcc void @test_vstrbq_p_u8(ptr %base, <16 x i8> %value, i16 zeroext %p) { 881; CHECK-LABEL: test_vstrbq_p_u8: 882; CHECK: @ %bb.0: @ %entry 883; CHECK-NEXT: vmsr p0, r1 884; CHECK-NEXT: vpst 885; CHECK-NEXT: vstrbt.8 q0, [r0] 886; CHECK-NEXT: bx lr 887entry: 888 %0 = zext i16 %p to i32 889 %1 = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) 890 call void @llvm.masked.store.v16i8.p0(<16 x i8> %value, ptr %base, i32 1, <16 x i1> %1) 891 ret void 892} 893 894define arm_aapcs_vfpcc void @test_vstrbq_p_u16(ptr %base, <8 x i16> %value, i16 zeroext %p) { 895; CHECK-LABEL: test_vstrbq_p_u16: 896; CHECK: @ %bb.0: @ %entry 897; CHECK-NEXT: vmsr p0, r1 898; CHECK-NEXT: vpst 899; CHECK-NEXT: vstrbt.16 q0, [r0] 900; CHECK-NEXT: bx lr 901entry: 902 %0 = trunc <8 x i16> %value to <8 x i8> 903 %1 = zext i16 %p to i32 904 %2 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %1) 905 call void @llvm.masked.store.v8i8.p0(<8 x i8> %0, ptr %base, i32 1, <8 x i1> %2) 906 ret void 907} 908 909define arm_aapcs_vfpcc void @test_vstrbq_p_u32(ptr %base, <4 x i32> %value, i16 zeroext %p) { 910; CHECK-LABEL: test_vstrbq_p_u32: 911; CHECK: @ %bb.0: @ %entry 912; CHECK-NEXT: vmsr p0, r1 913; CHECK-NEXT: vpst 914; CHECK-NEXT: vstrbt.32 q0, [r0] 915; CHECK-NEXT: bx lr 916entry: 917 %0 = trunc <4 x i32> %value to <4 x i8> 918 %1 = zext i16 %p to i32 919 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 920 call void @llvm.masked.store.v4i8.p0(<4 x i8> %0, ptr %base, i32 1, <4 x i1> %2) 921 ret void 922} 923 924define arm_aapcs_vfpcc void @test_vstrhq_f16(ptr %base, <8 x half> %value) { 925; CHECK-LABEL: test_vstrhq_f16: 926; CHECK: @ %bb.0: @ %entry 927; CHECK-NEXT: vstrh.16 q0, [r0] 928; CHECK-NEXT: bx lr 929entry: 930 store <8 x half> %value, ptr %base, align 2 931 ret void 932} 933 934define arm_aapcs_vfpcc void @test_vstrhq_s16(ptr %base, <8 x i16> %value) { 935; CHECK-LABEL: test_vstrhq_s16: 936; CHECK: @ %bb.0: @ %entry 937; CHECK-NEXT: vstrh.16 q0, [r0] 938; CHECK-NEXT: bx lr 939entry: 940 store <8 x i16> %value, ptr %base, align 2 941 ret void 942} 943 944define arm_aapcs_vfpcc void @test_vstrhq_s32(ptr %base, <4 x i32> %value) { 945; CHECK-LABEL: test_vstrhq_s32: 946; CHECK: @ %bb.0: @ %entry 947; CHECK-NEXT: vstrh.32 q0, [r0] 948; CHECK-NEXT: bx lr 949entry: 950 %0 = trunc <4 x i32> %value to <4 x i16> 951 store <4 x i16> %0, ptr %base, align 2 952 ret void 953} 954 955define arm_aapcs_vfpcc void @test_vstrhq_u16(ptr %base, <8 x i16> %value) { 956; CHECK-LABEL: test_vstrhq_u16: 957; CHECK: @ %bb.0: @ %entry 958; CHECK-NEXT: vstrh.16 q0, [r0] 959; CHECK-NEXT: bx lr 960entry: 961 store <8 x i16> %value, ptr %base, align 2 962 ret void 963} 964 965define arm_aapcs_vfpcc void @test_vstrhq_u32(ptr %base, <4 x i32> %value) { 966; CHECK-LABEL: test_vstrhq_u32: 967; CHECK: @ %bb.0: @ %entry 968; CHECK-NEXT: vstrh.32 q0, [r0] 969; CHECK-NEXT: bx lr 970entry: 971 %0 = trunc <4 x i32> %value to <4 x i16> 972 store <4 x i16> %0, ptr %base, align 2 973 ret void 974} 975 976define arm_aapcs_vfpcc void @test_vstrhq_p_f16(ptr %base, <8 x half> %value, i16 zeroext %p) { 977; CHECK-LABEL: test_vstrhq_p_f16: 978; CHECK: @ %bb.0: @ %entry 979; CHECK-NEXT: vmsr p0, r1 980; CHECK-NEXT: vpst 981; CHECK-NEXT: vstrht.16 q0, [r0] 982; CHECK-NEXT: bx lr 983entry: 984 %0 = zext i16 %p to i32 985 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 986 call void @llvm.masked.store.v8f16.p0(<8 x half> %value, ptr %base, i32 2, <8 x i1> %1) 987 ret void 988} 989 990define arm_aapcs_vfpcc void @test_vstrhq_p_s16(ptr %base, <8 x i16> %value, i16 zeroext %p) { 991; CHECK-LABEL: test_vstrhq_p_s16: 992; CHECK: @ %bb.0: @ %entry 993; CHECK-NEXT: vmsr p0, r1 994; CHECK-NEXT: vpst 995; CHECK-NEXT: vstrht.16 q0, [r0] 996; CHECK-NEXT: bx lr 997entry: 998 %0 = zext i16 %p to i32 999 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1000 call void @llvm.masked.store.v8i16.p0(<8 x i16> %value, ptr %base, i32 2, <8 x i1> %1) 1001 ret void 1002} 1003 1004define arm_aapcs_vfpcc void @test_vstrhq_p_s32(ptr %base, <4 x i32> %value, i16 zeroext %p) { 1005; CHECK-LABEL: test_vstrhq_p_s32: 1006; CHECK: @ %bb.0: @ %entry 1007; CHECK-NEXT: vmsr p0, r1 1008; CHECK-NEXT: vpst 1009; CHECK-NEXT: vstrht.32 q0, [r0] 1010; CHECK-NEXT: bx lr 1011entry: 1012 %0 = trunc <4 x i32> %value to <4 x i16> 1013 %1 = zext i16 %p to i32 1014 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 1015 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %base, i32 2, <4 x i1> %2) 1016 ret void 1017} 1018 1019declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32 immarg, <4 x i1>) 1020 1021define arm_aapcs_vfpcc void @test_vstrhq_p_u16(ptr %base, <8 x i16> %value, i16 zeroext %p) { 1022; CHECK-LABEL: test_vstrhq_p_u16: 1023; CHECK: @ %bb.0: @ %entry 1024; CHECK-NEXT: vmsr p0, r1 1025; CHECK-NEXT: vpst 1026; CHECK-NEXT: vstrht.16 q0, [r0] 1027; CHECK-NEXT: bx lr 1028entry: 1029 %0 = zext i16 %p to i32 1030 %1 = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) 1031 call void @llvm.masked.store.v8i16.p0(<8 x i16> %value, ptr %base, i32 2, <8 x i1> %1) 1032 ret void 1033} 1034 1035define arm_aapcs_vfpcc void @test_vstrhq_p_u32(ptr %base, <4 x i32> %value, i16 zeroext %p) { 1036; CHECK-LABEL: test_vstrhq_p_u32: 1037; CHECK: @ %bb.0: @ %entry 1038; CHECK-NEXT: vmsr p0, r1 1039; CHECK-NEXT: vpst 1040; CHECK-NEXT: vstrht.32 q0, [r0] 1041; CHECK-NEXT: bx lr 1042entry: 1043 %0 = trunc <4 x i32> %value to <4 x i16> 1044 %1 = zext i16 %p to i32 1045 %2 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %1) 1046 call void @llvm.masked.store.v4i16.p0(<4 x i16> %0, ptr %base, i32 2, <4 x i1> %2) 1047 ret void 1048} 1049 1050define arm_aapcs_vfpcc void @test_vstrwq_f32(ptr %base, <4 x float> %value) { 1051; CHECK-LABEL: test_vstrwq_f32: 1052; CHECK: @ %bb.0: @ %entry 1053; CHECK-NEXT: vstrw.32 q0, [r0] 1054; CHECK-NEXT: bx lr 1055entry: 1056 store <4 x float> %value, ptr %base, align 4 1057 ret void 1058} 1059 1060define arm_aapcs_vfpcc void @test_vstrwq_s32(ptr %base, <4 x i32> %value) { 1061; CHECK-LABEL: test_vstrwq_s32: 1062; CHECK: @ %bb.0: @ %entry 1063; CHECK-NEXT: vstrw.32 q0, [r0] 1064; CHECK-NEXT: bx lr 1065entry: 1066 store <4 x i32> %value, ptr %base, align 4 1067 ret void 1068} 1069 1070define arm_aapcs_vfpcc void @test_vstrwq_u32(ptr %base, <4 x i32> %value) { 1071; CHECK-LABEL: test_vstrwq_u32: 1072; CHECK: @ %bb.0: @ %entry 1073; CHECK-NEXT: vstrw.32 q0, [r0] 1074; CHECK-NEXT: bx lr 1075entry: 1076 store <4 x i32> %value, ptr %base, align 4 1077 ret void 1078} 1079 1080define arm_aapcs_vfpcc void @test_vstrwq_p_f32(ptr %base, <4 x float> %value, i16 zeroext %p) { 1081; CHECK-LABEL: test_vstrwq_p_f32: 1082; CHECK: @ %bb.0: @ %entry 1083; CHECK-NEXT: vmsr p0, r1 1084; CHECK-NEXT: vpst 1085; CHECK-NEXT: vstrwt.32 q0, [r0] 1086; CHECK-NEXT: bx lr 1087entry: 1088 %0 = zext i16 %p to i32 1089 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1090 call void @llvm.masked.store.v4f32.p0(<4 x float> %value, ptr %base, i32 4, <4 x i1> %1) 1091 ret void 1092} 1093 1094define arm_aapcs_vfpcc void @test_vstrwq_p_s32(ptr %base, <4 x i32> %value, i16 zeroext %p) { 1095; CHECK-LABEL: test_vstrwq_p_s32: 1096; CHECK: @ %bb.0: @ %entry 1097; CHECK-NEXT: vmsr p0, r1 1098; CHECK-NEXT: vpst 1099; CHECK-NEXT: vstrwt.32 q0, [r0] 1100; CHECK-NEXT: bx lr 1101entry: 1102 %0 = zext i16 %p to i32 1103 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1104 call void @llvm.masked.store.v4i32.p0(<4 x i32> %value, ptr %base, i32 4, <4 x i1> %1) 1105 ret void 1106} 1107 1108define arm_aapcs_vfpcc void @test_vstrwq_p_u32(ptr %base, <4 x i32> %value, i16 zeroext %p) { 1109; CHECK-LABEL: test_vstrwq_p_u32: 1110; CHECK: @ %bb.0: @ %entry 1111; CHECK-NEXT: vmsr p0, r1 1112; CHECK-NEXT: vpst 1113; CHECK-NEXT: vstrwt.32 q0, [r0] 1114; CHECK-NEXT: bx lr 1115entry: 1116 %0 = zext i16 %p to i32 1117 %1 = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) 1118 call void @llvm.masked.store.v4i32.p0(<4 x i32> %value, ptr %base, i32 4, <4 x i1> %1) 1119 ret void 1120} 1121