1; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s 2; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s 3; The instruction latencies of Exynos-M3 trigger the transform we see under the Exynos check. 4; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -mcpu=exynos-m3 | FileCheck --check-prefix=EXYNOS %s 5 6define void @st1lane_16b(<16 x i8> %A, ptr %D) { 7; CHECK-LABEL: st1lane_16b 8; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}] 9 %ptr = getelementptr i8, ptr %D, i64 1 10 %tmp = extractelement <16 x i8> %A, i32 1 11 store i8 %tmp, ptr %ptr 12 ret void 13} 14 15define void @st1lane0_16b(<16 x i8> %A, ptr %D) { 16; CHECK-LABEL: st1lane0_16b 17; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}] 18 %ptr = getelementptr i8, ptr %D, i64 1 19 %tmp = extractelement <16 x i8> %A, i32 0 20 store i8 %tmp, ptr %ptr 21 ret void 22} 23 24define void @st1lane0u_16b(<16 x i8> %A, ptr %D) { 25; CHECK-LABEL: st1lane0u_16b 26; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}] 27 %ptr = getelementptr i8, ptr %D, i64 -1 28 %tmp = extractelement <16 x i8> %A, i32 0 29 store i8 %tmp, ptr %ptr 30 ret void 31} 32 33define void @st1lane_ro_16b(<16 x i8> %A, ptr %D, i64 %offset) { 34; CHECK-LABEL: st1lane_ro_16b 35; CHECK: add x[[XREG:[0-9]+]], x0, x1 36; CHECK: st1.b { v0 }[1], [x[[XREG]]] 37 %ptr = getelementptr i8, ptr %D, i64 %offset 38 %tmp = extractelement <16 x i8> %A, i32 1 39 store i8 %tmp, ptr %ptr 40 ret void 41} 42 43define void @st1lane0_ro_16b(<16 x i8> %A, ptr %D, i64 %offset) { 44; CHECK-LABEL: st1lane0_ro_16b 45; CHECK: add x[[XREG:[0-9]+]], x0, x1 46; CHECK: st1.b { v0 }[0], [x[[XREG]]] 47 %ptr = getelementptr i8, ptr %D, i64 %offset 48 %tmp = extractelement <16 x i8> %A, i32 0 49 store i8 %tmp, ptr %ptr 50 ret void 51} 52 53define void @st1lane_8h(<8 x i16> %A, ptr %D) { 54; CHECK-LABEL: st1lane_8h 55; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}] 56 %ptr = getelementptr i16, ptr %D, i64 1 57 %tmp = extractelement <8 x i16> %A, i32 1 58 store i16 %tmp, ptr %ptr 59 ret void 60} 61 62define void @st1lane0_8h(<8 x i16> %A, ptr %D) { 63; CHECK-LABEL: st1lane0_8h 64; CHECK: str h0, [x0, #2] 65 %ptr = getelementptr i16, ptr %D, i64 1 66 %tmp = extractelement <8 x i16> %A, i32 0 67 store i16 %tmp, ptr %ptr 68 ret void 69} 70 71define void @st1lane0u_8h(<8 x i16> %A, ptr %D) { 72; CHECK-LABEL: st1lane0u_8h 73; CHECK: stur h0, [x0, #-2] 74 %ptr = getelementptr i16, ptr %D, i64 -1 75 %tmp = extractelement <8 x i16> %A, i32 0 76 store i16 %tmp, ptr %ptr 77 ret void 78} 79 80define void @st1lane_ro_8h(<8 x i16> %A, ptr %D, i64 %offset) { 81; CHECK-LABEL: st1lane_ro_8h 82; CHECK: add x[[XREG:[0-9]+]], x0, x1 83; CHECK: st1.h { v0 }[1], [x[[XREG]]] 84 %ptr = getelementptr i16, ptr %D, i64 %offset 85 %tmp = extractelement <8 x i16> %A, i32 1 86 store i16 %tmp, ptr %ptr 87 ret void 88} 89 90define void @st1lane0_ro_8h(<8 x i16> %A, ptr %D, i64 %offset) { 91; CHECK-LABEL: st1lane0_ro_8h 92; CHECK: str h0, [x0, x1, lsl #1] 93 %ptr = getelementptr i16, ptr %D, i64 %offset 94 %tmp = extractelement <8 x i16> %A, i32 0 95 store i16 %tmp, ptr %ptr 96 ret void 97} 98 99define void @st1lane_4s(<4 x i32> %A, ptr %D) { 100; CHECK-LABEL: st1lane_4s 101; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}] 102 %ptr = getelementptr i32, ptr %D, i64 1 103 %tmp = extractelement <4 x i32> %A, i32 1 104 store i32 %tmp, ptr %ptr 105 ret void 106} 107 108define void @st1lane0_4s(<4 x i32> %A, ptr %D) { 109; CHECK-LABEL: st1lane0_4s 110; CHECK: str s0, [x0, #4] 111 %ptr = getelementptr i32, ptr %D, i64 1 112 %tmp = extractelement <4 x i32> %A, i32 0 113 store i32 %tmp, ptr %ptr 114 ret void 115} 116 117define void @st1lane0u_4s(<4 x i32> %A, ptr %D) { 118; CHECK-LABEL: st1lane0u_4s 119; CHECK: stur s0, [x0, #-4] 120 %ptr = getelementptr i32, ptr %D, i64 -1 121 %tmp = extractelement <4 x i32> %A, i32 0 122 store i32 %tmp, ptr %ptr 123 ret void 124} 125 126define void @st1lane_ro_4s(<4 x i32> %A, ptr %D, i64 %offset) { 127; CHECK-LABEL: st1lane_ro_4s 128; CHECK: add x[[XREG:[0-9]+]], x0, x1 129; CHECK: st1.s { v0 }[1], [x[[XREG]]] 130 %ptr = getelementptr i32, ptr %D, i64 %offset 131 %tmp = extractelement <4 x i32> %A, i32 1 132 store i32 %tmp, ptr %ptr 133 ret void 134} 135 136define void @st1lane0_ro_4s(<4 x i32> %A, ptr %D, i64 %offset) { 137; CHECK-LABEL: st1lane0_ro_4s 138; CHECK: str s0, [x0, x1, lsl #2] 139 %ptr = getelementptr i32, ptr %D, i64 %offset 140 %tmp = extractelement <4 x i32> %A, i32 0 141 store i32 %tmp, ptr %ptr 142 ret void 143} 144 145define void @st1lane_4s_float(<4 x float> %A, ptr %D) { 146; CHECK-LABEL: st1lane_4s_float 147; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}] 148 %ptr = getelementptr float, ptr %D, i64 1 149 %tmp = extractelement <4 x float> %A, i32 1 150 store float %tmp, ptr %ptr 151 ret void 152} 153 154define void @st1lane0_4s_float(<4 x float> %A, ptr %D) { 155; CHECK-LABEL: st1lane0_4s_float 156; CHECK: str s0, [x0, #4] 157 %ptr = getelementptr float, ptr %D, i64 1 158 %tmp = extractelement <4 x float> %A, i32 0 159 store float %tmp, ptr %ptr 160 ret void 161} 162 163define void @st1lane0u_4s_float(<4 x float> %A, ptr %D) { 164; CHECK-LABEL: st1lane0u_4s_float 165; CHECK: stur s0, [x0, #-4] 166 %ptr = getelementptr float, ptr %D, i64 -1 167 %tmp = extractelement <4 x float> %A, i32 0 168 store float %tmp, ptr %ptr 169 ret void 170} 171 172define void @st1lane_ro_4s_float(<4 x float> %A, ptr %D, i64 %offset) { 173; CHECK-LABEL: st1lane_ro_4s_float 174; CHECK: add x[[XREG:[0-9]+]], x0, x1 175; CHECK: st1.s { v0 }[1], [x[[XREG]]] 176 %ptr = getelementptr float, ptr %D, i64 %offset 177 %tmp = extractelement <4 x float> %A, i32 1 178 store float %tmp, ptr %ptr 179 ret void 180} 181 182define void @st1lane0_ro_4s_float(<4 x float> %A, ptr %D, i64 %offset) { 183; CHECK-LABEL: st1lane0_ro_4s_float 184; CHECK: str s0, [x0, x1, lsl #2] 185 %ptr = getelementptr float, ptr %D, i64 %offset 186 %tmp = extractelement <4 x float> %A, i32 0 187 store float %tmp, ptr %ptr 188 ret void 189} 190 191define void @st1lane_2d(<2 x i64> %A, ptr %D) { 192; CHECK-LABEL: st1lane_2d 193; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}] 194 %ptr = getelementptr i64, ptr %D, i64 1 195 %tmp = extractelement <2 x i64> %A, i32 1 196 store i64 %tmp, ptr %ptr 197 ret void 198} 199 200define void @st1lane0_2d(<2 x i64> %A, ptr %D) { 201; CHECK-LABEL: st1lane0_2d 202; CHECK: str d0, [x0, #8] 203 %ptr = getelementptr i64, ptr %D, i64 1 204 %tmp = extractelement <2 x i64> %A, i32 0 205 store i64 %tmp, ptr %ptr 206 ret void 207} 208 209define void @st1lane0u_2d(<2 x i64> %A, ptr %D) { 210; CHECK-LABEL: st1lane0u_2d 211; CHECK: stur d0, [x0, #-8] 212 %ptr = getelementptr i64, ptr %D, i64 -1 213 %tmp = extractelement <2 x i64> %A, i32 0 214 store i64 %tmp, ptr %ptr 215 ret void 216} 217 218define void @st1lane_ro_2d(<2 x i64> %A, ptr %D, i64 %offset) { 219; CHECK-LABEL: st1lane_ro_2d 220; CHECK: add x[[XREG:[0-9]+]], x0, x1 221; CHECK: st1.d { v0 }[1], [x[[XREG]]] 222 %ptr = getelementptr i64, ptr %D, i64 %offset 223 %tmp = extractelement <2 x i64> %A, i32 1 224 store i64 %tmp, ptr %ptr 225 ret void 226} 227 228define void @st1lane0_ro_2d(<2 x i64> %A, ptr %D, i64 %offset) { 229; CHECK-LABEL: st1lane0_ro_2d 230; CHECK: str d0, [x0, x1, lsl #3] 231 %ptr = getelementptr i64, ptr %D, i64 %offset 232 %tmp = extractelement <2 x i64> %A, i32 0 233 store i64 %tmp, ptr %ptr 234 ret void 235} 236 237define void @st1lane_2d_double(<2 x double> %A, ptr %D) { 238; CHECK-LABEL: st1lane_2d_double 239; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}] 240 %ptr = getelementptr double, ptr %D, i64 1 241 %tmp = extractelement <2 x double> %A, i32 1 242 store double %tmp, ptr %ptr 243 ret void 244} 245 246define void @st1lane0_2d_double(<2 x double> %A, ptr %D) { 247; CHECK-LABEL: st1lane0_2d_double 248; CHECK: str d0, [x0, #8] 249 %ptr = getelementptr double, ptr %D, i64 1 250 %tmp = extractelement <2 x double> %A, i32 0 251 store double %tmp, ptr %ptr 252 ret void 253} 254 255define void @st1lane0u_2d_double(<2 x double> %A, ptr %D) { 256; CHECK-LABEL: st1lane0u_2d_double 257; CHECK: stur d0, [x0, #-8] 258 %ptr = getelementptr double, ptr %D, i64 -1 259 %tmp = extractelement <2 x double> %A, i32 0 260 store double %tmp, ptr %ptr 261 ret void 262} 263 264define void @st1lane_ro_2d_double(<2 x double> %A, ptr %D, i64 %offset) { 265; CHECK-LABEL: st1lane_ro_2d_double 266; CHECK: add x[[XREG:[0-9]+]], x0, x1 267; CHECK: st1.d { v0 }[1], [x[[XREG]]] 268 %ptr = getelementptr double, ptr %D, i64 %offset 269 %tmp = extractelement <2 x double> %A, i32 1 270 store double %tmp, ptr %ptr 271 ret void 272} 273 274define void @st1lane0_ro_2d_double(<2 x double> %A, ptr %D, i64 %offset) { 275; CHECK-LABEL: st1lane0_ro_2d_double 276; CHECK: str d0, [x0, x1, lsl #3] 277 %ptr = getelementptr double, ptr %D, i64 %offset 278 %tmp = extractelement <2 x double> %A, i32 0 279 store double %tmp, ptr %ptr 280 ret void 281} 282 283define void @st1lane_8b(<8 x i8> %A, ptr %D) { 284; CHECK-LABEL: st1lane_8b 285; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}] 286 %ptr = getelementptr i8, ptr %D, i64 1 287 %tmp = extractelement <8 x i8> %A, i32 1 288 store i8 %tmp, ptr %ptr 289 ret void 290} 291 292define void @st1lane_ro_8b(<8 x i8> %A, ptr %D, i64 %offset) { 293; CHECK-LABEL: st1lane_ro_8b 294; CHECK: add x[[XREG:[0-9]+]], x0, x1 295; CHECK: st1.b { v0 }[1], [x[[XREG]]] 296 %ptr = getelementptr i8, ptr %D, i64 %offset 297 %tmp = extractelement <8 x i8> %A, i32 1 298 store i8 %tmp, ptr %ptr 299 ret void 300} 301 302define void @st1lane0_ro_8b(<8 x i8> %A, ptr %D, i64 %offset) { 303; CHECK-LABEL: st1lane0_ro_8b 304; CHECK: add x[[XREG:[0-9]+]], x0, x1 305; CHECK: st1.b { v0 }[0], [x[[XREG]]] 306 %ptr = getelementptr i8, ptr %D, i64 %offset 307 %tmp = extractelement <8 x i8> %A, i32 0 308 store i8 %tmp, ptr %ptr 309 ret void 310} 311 312define void @st1lane_4h(<4 x i16> %A, ptr %D) { 313; CHECK-LABEL: st1lane_4h 314; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}] 315 %ptr = getelementptr i16, ptr %D, i64 1 316 %tmp = extractelement <4 x i16> %A, i32 1 317 store i16 %tmp, ptr %ptr 318 ret void 319} 320 321define void @st1lane0_4h(<4 x i16> %A, ptr %D) { 322; CHECK-LABEL: st1lane0_4h 323; CHECK: str h0, [x0, #2] 324 %ptr = getelementptr i16, ptr %D, i64 1 325 %tmp = extractelement <4 x i16> %A, i32 0 326 store i16 %tmp, ptr %ptr 327 ret void 328} 329 330define void @st1lane0u_4h(<4 x i16> %A, ptr %D) { 331; CHECK-LABEL: st1lane0u_4h 332; CHECK: stur h0, [x0, #-2] 333 %ptr = getelementptr i16, ptr %D, i64 -1 334 %tmp = extractelement <4 x i16> %A, i32 0 335 store i16 %tmp, ptr %ptr 336 ret void 337} 338 339define void @st1lane_ro_4h(<4 x i16> %A, ptr %D, i64 %offset) { 340; CHECK-LABEL: st1lane_ro_4h 341; CHECK: add x[[XREG:[0-9]+]], x0, x1 342; CHECK: st1.h { v0 }[1], [x[[XREG]]] 343 %ptr = getelementptr i16, ptr %D, i64 %offset 344 %tmp = extractelement <4 x i16> %A, i32 1 345 store i16 %tmp, ptr %ptr 346 ret void 347} 348 349define void @st1lane0_ro_4h(<4 x i16> %A, ptr %D, i64 %offset) { 350; CHECK-LABEL: st1lane0_ro_4h 351; CHECK: str h0, [x0, x1, lsl #1] 352 %ptr = getelementptr i16, ptr %D, i64 %offset 353 %tmp = extractelement <4 x i16> %A, i32 0 354 store i16 %tmp, ptr %ptr 355 ret void 356} 357 358define void @st1lane_2s(<2 x i32> %A, ptr %D) { 359; CHECK-LABEL: st1lane_2s 360; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}] 361 %ptr = getelementptr i32, ptr %D, i64 1 362 %tmp = extractelement <2 x i32> %A, i32 1 363 store i32 %tmp, ptr %ptr 364 ret void 365} 366 367define void @st1lane0_2s(<2 x i32> %A, ptr %D) { 368; CHECK-LABEL: st1lane0_2s 369; CHECK: str s0, [x0, #4] 370 %ptr = getelementptr i32, ptr %D, i64 1 371 %tmp = extractelement <2 x i32> %A, i32 0 372 store i32 %tmp, ptr %ptr 373 ret void 374} 375 376define void @st1lane0u_2s(<2 x i32> %A, ptr %D) { 377; CHECK-LABEL: st1lane0u_2s 378; CHECK: stur s0, [x0, #-4] 379 %ptr = getelementptr i32, ptr %D, i64 -1 380 %tmp = extractelement <2 x i32> %A, i32 0 381 store i32 %tmp, ptr %ptr 382 ret void 383} 384 385define void @st1lane_ro_2s(<2 x i32> %A, ptr %D, i64 %offset) { 386; CHECK-LABEL: st1lane_ro_2s 387; CHECK: add x[[XREG:[0-9]+]], x0, x1 388; CHECK: st1.s { v0 }[1], [x[[XREG]]] 389 %ptr = getelementptr i32, ptr %D, i64 %offset 390 %tmp = extractelement <2 x i32> %A, i32 1 391 store i32 %tmp, ptr %ptr 392 ret void 393} 394 395define void @st1lane0_ro_2s(<2 x i32> %A, ptr %D, i64 %offset) { 396; CHECK-LABEL: st1lane0_ro_2s 397; CHECK: str s0, [x0, x1, lsl #2] 398 %ptr = getelementptr i32, ptr %D, i64 %offset 399 %tmp = extractelement <2 x i32> %A, i32 0 400 store i32 %tmp, ptr %ptr 401 ret void 402} 403 404define void @st1lane_2s_float(<2 x float> %A, ptr %D) { 405; CHECK-LABEL: st1lane_2s_float 406; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}] 407 %ptr = getelementptr float, ptr %D, i64 1 408 %tmp = extractelement <2 x float> %A, i32 1 409 store float %tmp, ptr %ptr 410 ret void 411} 412 413define void @st1lane0_2s_float(<2 x float> %A, ptr %D) { 414; CHECK-LABEL: st1lane0_2s_float 415; CHECK: str s0, [x0, #4] 416 %ptr = getelementptr float, ptr %D, i64 1 417 %tmp = extractelement <2 x float> %A, i32 0 418 store float %tmp, ptr %ptr 419 ret void 420} 421 422define void @st1lane0u_2s_float(<2 x float> %A, ptr %D) { 423; CHECK-LABEL: st1lane0u_2s_float 424; CHECK: stur s0, [x0, #-4] 425 %ptr = getelementptr float, ptr %D, i64 -1 426 %tmp = extractelement <2 x float> %A, i32 0 427 store float %tmp, ptr %ptr 428 ret void 429} 430 431define void @st1lane_ro_2s_float(<2 x float> %A, ptr %D, i64 %offset) { 432; CHECK-LABEL: st1lane_ro_2s_float 433; CHECK: add x[[XREG:[0-9]+]], x0, x1 434; CHECK: st1.s { v0 }[1], [x[[XREG]]] 435 %ptr = getelementptr float, ptr %D, i64 %offset 436 %tmp = extractelement <2 x float> %A, i32 1 437 store float %tmp, ptr %ptr 438 ret void 439} 440 441define void @st1lane0_ro_2s_float(<2 x float> %A, ptr %D, i64 %offset) { 442; CHECK-LABEL: st1lane0_ro_2s_float 443; CHECK: str s0, [x0, x1, lsl #2] 444 %ptr = getelementptr float, ptr %D, i64 %offset 445 %tmp = extractelement <2 x float> %A, i32 0 446 store float %tmp, ptr %ptr 447 ret void 448} 449 450define void @st1lane0_1d(<1 x i64> %A, ptr %D) { 451; CHECK-LABEL: st1lane0_1d 452; CHECK: str d0, [x0, #8] 453 %ptr = getelementptr i64, ptr %D, i64 1 454 %tmp = extractelement <1 x i64> %A, i32 0 455 store i64 %tmp, ptr %ptr 456 ret void 457} 458 459define void @st1lane0u_1d(<1 x i64> %A, ptr %D) { 460; CHECK-LABEL: st1lane0u_1d 461; CHECK: stur d0, [x0, #-8] 462 %ptr = getelementptr i64, ptr %D, i64 -1 463 %tmp = extractelement <1 x i64> %A, i32 0 464 store i64 %tmp, ptr %ptr 465 ret void 466} 467 468define void @st1lane0_ro_1d(<1 x i64> %A, ptr %D, i64 %offset) { 469; CHECK-LABEL: st1lane0_ro_1d 470; CHECK: str d0, [x0, x1, lsl #3] 471 %ptr = getelementptr i64, ptr %D, i64 %offset 472 %tmp = extractelement <1 x i64> %A, i32 0 473 store i64 %tmp, ptr %ptr 474 ret void 475} 476 477define void @st1lane0_1d_double(<1 x double> %A, ptr %D) { 478; CHECK-LABEL: st1lane0_1d_double 479; CHECK: str d0, [x0, #8] 480 %ptr = getelementptr double, ptr %D, i64 1 481 %tmp = extractelement <1 x double> %A, i32 0 482 store double %tmp, ptr %ptr 483 ret void 484} 485 486define void @st1lane0u_1d_double(<1 x double> %A, ptr %D) { 487; CHECK-LABEL: st1lane0u_1d_double 488; CHECK: stur d0, [x0, #-8] 489 %ptr = getelementptr double, ptr %D, i64 -1 490 %tmp = extractelement <1 x double> %A, i32 0 491 store double %tmp, ptr %ptr 492 ret void 493} 494 495define void @st1lane0_ro_1d_double(<1 x double> %A, ptr %D, i64 %offset) { 496; CHECK-LABEL: st1lane0_ro_1d_double 497; CHECK: str d0, [x0, x1, lsl #3] 498 %ptr = getelementptr double, ptr %D, i64 %offset 499 %tmp = extractelement <1 x double> %A, i32 0 500 store double %tmp, ptr %ptr 501 ret void 502} 503 504define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, ptr %D) { 505; CHECK-LABEL: st2lane_16b 506; CHECK: st2.b 507 call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %A, <16 x i8> %B, i64 1, ptr %D) 508 ret void 509} 510 511define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, ptr %D) { 512; CHECK-LABEL: st2lane_8h 513; CHECK: st2.h 514 call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %A, <8 x i16> %B, i64 1, ptr %D) 515 ret void 516} 517 518define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, ptr %D) { 519; CHECK-LABEL: st2lane_4s 520; CHECK: st2.s 521 call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %A, <4 x i32> %B, i64 1, ptr %D) 522 ret void 523} 524 525define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, ptr %D) { 526; CHECK-LABEL: st2lane_2d 527; CHECK: st2.d 528 call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %A, <2 x i64> %B, i64 1, ptr %D) 529 ret void 530} 531 532declare void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr) nounwind readnone 533declare void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr) nounwind readnone 534declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr) nounwind readnone 535declare void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr) nounwind readnone 536 537define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %D) { 538; CHECK-LABEL: st3lane_16b 539; CHECK: st3.b 540 call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, ptr %D) 541 ret void 542} 543 544define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %D) { 545; CHECK-LABEL: st3lane_8h 546; CHECK: st3.h 547 call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, ptr %D) 548 ret void 549} 550 551define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %D) { 552; CHECK-LABEL: st3lane_4s 553; CHECK: st3.s 554 call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, ptr %D) 555 ret void 556} 557 558define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %D) { 559; CHECK-LABEL: st3lane_2d 560; CHECK: st3.d 561 call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, ptr %D) 562 ret void 563} 564 565declare void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readnone 566declare void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readnone 567declare void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readnone 568declare void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readnone 569 570define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %E) { 571; CHECK-LABEL: st4lane_16b 572; CHECK: st4.b 573 call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, ptr %E) 574 ret void 575} 576 577define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %E) { 578; CHECK-LABEL: st4lane_8h 579; CHECK: st4.h 580 call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, ptr %E) 581 ret void 582} 583 584define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %E) { 585; CHECK-LABEL: st4lane_4s 586; CHECK: st4.s 587 call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, ptr %E) 588 ret void 589} 590 591define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %E) { 592; CHECK-LABEL: st4lane_2d 593; CHECK: st4.d 594 call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, ptr %E) 595 ret void 596} 597 598declare void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readnone 599declare void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readnone 600declare void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readnone 601declare void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readnone 602 603 604define void @st2_8b(<8 x i8> %A, <8 x i8> %B, ptr %P) nounwind { 605; CHECK-LABEL: st2_8b 606; CHECK: st2.8b 607; EXYNOS-LABEL: st2_8b 608; EXYNOS: zip1.8b 609; EXYNOS: zip2.8b 610; EXYNOS: stp 611 call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %A, <8 x i8> %B, ptr %P) 612 ret void 613} 614 615define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounwind { 616; CHECK-LABEL: st3_8b 617; CHECK: st3.8b 618 call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) 619 ret void 620} 621 622define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %P) nounwind { 623; CHECK-LABEL: st4_8b 624; CHECK: st4.8b 625; EXYNOS-LABEL: st4_8b 626; EXYNOS: zip1.8b 627; EXYNOS: zip2.8b 628; EXYNOS: zip1.8b 629; EXYNOS: zip2.8b 630; EXYNOS: zip1.8b 631; EXYNOS: zip2.8b 632; EXYNOS: stp 633; EXYNOS: zip1.8b 634; EXYNOS: zip2.8b 635; EXYNOS: stp 636 call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %P) 637 ret void 638} 639 640declare void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8>, <8 x i8>, ptr) nounwind readonly 641declare void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr) nounwind readonly 642declare void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, ptr) nounwind readonly 643 644define void @st2_16b(<16 x i8> %A, <16 x i8> %B, ptr %P) nounwind { 645; CHECK-LABEL: st2_16b 646; CHECK: st2.16b 647; EXYNOS-LABEL: st2_16b 648; EXYNOS: zip1.16b 649; EXYNOS: zip2.16b 650; EXYNOS: stp 651 call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %A, <16 x i8> %B, ptr %P) 652 ret void 653} 654 655define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %P) nounwind { 656; CHECK-LABEL: st3_16b 657; CHECK: st3.16b 658 call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %P) 659 ret void 660} 661 662define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %P) nounwind { 663; CHECK-LABEL: st4_16b 664; CHECK: st4.16b 665; EXYNOS-LABEL: st4_16b 666; EXYNOS: zip1.16b 667; EXYNOS: zip2.16b 668; EXYNOS: zip1.16b 669; EXYNOS: zip2.16b 670; EXYNOS: zip1.16b 671; EXYNOS: zip2.16b 672; EXYNOS: stp 673; EXYNOS: zip1.16b 674; EXYNOS: zip2.16b 675; EXYNOS: stp 676 call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %P) 677 ret void 678} 679 680declare void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8>, <16 x i8>, ptr) nounwind readonly 681declare void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, ptr) nounwind readonly 682declare void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, ptr) nounwind readonly 683 684define void @st2_4h(<4 x i16> %A, <4 x i16> %B, ptr %P) nounwind { 685; CHECK-LABEL: st2_4h 686; CHECK: st2.4h 687; EXYNOS-LABEL: st2_4h 688; EXYNOS: zip1.4h 689; EXYNOS: zip2.4h 690; EXYNOS: stp 691 call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %A, <4 x i16> %B, ptr %P) 692 ret void 693} 694 695define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %P) nounwind { 696; CHECK-LABEL: st3_4h 697; CHECK: st3.4h 698 call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %P) 699 ret void 700} 701 702define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %P) nounwind { 703; CHECK-LABEL: st4_4h 704; CHECK: st4.4h 705; EXYNOS-LABEL: st4_4h 706; EXYNOS: zip1.4h 707; EXYNOS: zip2.4h 708; EXYNOS: zip1.4h 709; EXYNOS: zip2.4h 710; EXYNOS: zip1.4h 711; EXYNOS: zip2.4h 712; EXYNOS: stp 713; EXYNOS: zip1.4h 714; EXYNOS: zip2.4h 715; EXYNOS: stp 716 call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %P) 717 ret void 718} 719 720declare void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16>, <4 x i16>, ptr) nounwind readonly 721declare void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, ptr) nounwind readonly 722declare void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, ptr) nounwind readonly 723 724define void @st2_8h(<8 x i16> %A, <8 x i16> %B, ptr %P) nounwind { 725; CHECK-LABEL: st2_8h 726; CHECK: st2.8h 727; EXYNOS-LABEL: st2_8h 728; EXYNOS: zip1.8h 729; EXYNOS: zip2.8h 730; EXYNOS: stp 731 call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %A, <8 x i16> %B, ptr %P) 732 ret void 733} 734 735define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %P) nounwind { 736; CHECK-LABEL: st3_8h 737; CHECK: st3.8h 738 call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %P) 739 ret void 740} 741 742define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %P) nounwind { 743; CHECK-LABEL: st4_8h 744; CHECK: st4.8h 745; EXYNOS-LABEL: st4_8h 746; EXYNOS: zip1.8h 747; EXYNOS: zip2.8h 748; EXYNOS: zip1.8h 749; EXYNOS: zip2.8h 750; EXYNOS: zip1.8h 751; EXYNOS: zip2.8h 752; EXYNOS: stp 753; EXYNOS: zip1.8h 754; EXYNOS: zip2.8h 755; EXYNOS: stp 756 call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %P) 757 ret void 758} 759 760declare void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) nounwind readonly 761declare void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, ptr) nounwind readonly 762declare void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, ptr) nounwind readonly 763 764define void @st2_2s(<2 x i32> %A, <2 x i32> %B, ptr %P) nounwind { 765; CHECK-LABEL: st2_2s 766; CHECK: st2.2s 767; EXYNOS-LABEL: st2_2s 768; EXYNOS: zip1.2s 769; EXYNOS: zip2.2s 770; EXYNOS: stp 771 call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %A, <2 x i32> %B, ptr %P) 772 ret void 773} 774 775define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %P) nounwind { 776; CHECK-LABEL: st3_2s 777; CHECK: st3.2s 778 call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %P) 779 ret void 780} 781 782define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %P) nounwind { 783; CHECK-LABEL: st4_2s 784; CHECK: st4.2s 785; EXYNOS-LABEL: st4_2s 786; EXYNOS: zip1.2s 787; EXYNOS: zip2.2s 788; EXYNOS: zip1.2s 789; EXYNOS: zip2.2s 790; EXYNOS: zip1.2s 791; EXYNOS: zip2.2s 792; EXYNOS: stp 793; EXYNOS: zip1.2s 794; EXYNOS: zip2.2s 795; EXYNOS: stp 796 call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %P) 797 ret void 798} 799 800declare void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32>, <2 x i32>, ptr) nounwind readonly 801declare void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, ptr) nounwind readonly 802declare void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, ptr) nounwind readonly 803 804define void @st2_4s(<4 x i32> %A, <4 x i32> %B, ptr %P) nounwind { 805; CHECK-LABEL: st2_4s 806; CHECK: st2.4s 807; EXYNOS-LABEL: st2_4s 808; EXYNOS: zip1.4s 809; EXYNOS: zip2.4s 810; EXYNOS: stp 811 call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %A, <4 x i32> %B, ptr %P) 812 ret void 813} 814 815define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %P) nounwind { 816; CHECK-LABEL: st3_4s 817; CHECK: st3.4s 818 call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %P) 819 ret void 820} 821 822define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %P) nounwind { 823; CHECK-LABEL: st4_4s 824; CHECK: st4.4s 825; EXYNOS-LABEL: st4_4s 826; EXYNOS: zip1.4s 827; EXYNOS: zip2.4s 828; EXYNOS: zip1.4s 829; EXYNOS: zip2.4s 830; EXYNOS: zip1.4s 831; EXYNOS: zip2.4s 832; EXYNOS: stp 833; EXYNOS: zip1.4s 834; EXYNOS: zip2.4s 835; EXYNOS: stp 836 call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %P) 837 ret void 838} 839 840declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) nounwind readonly 841declare void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, ptr) nounwind readonly 842declare void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, ptr) nounwind readonly 843 844; If there's only one element, st2/3/4 don't make much sense, stick to st1. 845define void @st2_1d(<1 x i64> %A, <1 x i64> %B, ptr %P) nounwind { 846; CHECK-LABEL: st2_1d 847; CHECK: st1.1d 848 call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %A, <1 x i64> %B, ptr %P) 849 ret void 850} 851 852define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %P) nounwind { 853; CHECK-LABEL: st3_1d 854; CHECK: st1.1d 855 call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %P) 856 ret void 857} 858 859define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %P) nounwind { 860; CHECK-LABEL: st4_1d 861; CHECK: st1.1d 862 call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %P) 863 ret void 864} 865 866declare void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64>, <1 x i64>, ptr) nounwind readonly 867declare void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, ptr) nounwind readonly 868declare void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, ptr) nounwind readonly 869 870define void @st2_2d(<2 x i64> %A, <2 x i64> %B, ptr %P) nounwind { 871; CHECK-LABEL: st2_2d 872; CHECK: st2.2d 873; EXYNOS-LABEL: st2_2d 874; EXYNOS: zip1.2d 875; EXYNOS: zip2.2d 876; EXYNOS: stp 877 call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %A, <2 x i64> %B, ptr %P) 878 ret void 879} 880 881define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) nounwind { 882; CHECK-LABEL: st3_2d 883; CHECK: st3.2d 884 call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) 885 ret void 886} 887 888define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %P) nounwind { 889; CHECK-LABEL: st4_2d 890; CHECK: st4.2d 891; EXYNOS-LABEL: st4_2d 892; EXYNOS: zip1.2d 893; EXYNOS: zip2.2d 894; EXYNOS: zip1.2d 895; EXYNOS: zip2.2d 896; EXYNOS: zip1.2d 897; EXYNOS: zip2.2d 898; EXYNOS: stp 899; EXYNOS: zip1.2d 900; EXYNOS: zip2.2d 901; EXYNOS: stp 902 call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %P) 903 ret void 904} 905 906declare void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64>, <2 x i64>, ptr) nounwind readonly 907declare void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, ptr) nounwind readonly 908declare void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, ptr) nounwind readonly 909 910declare void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8>, <8 x i8>, ptr) nounwind readonly 911declare void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16>, <4 x i16>, ptr) nounwind readonly 912declare void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32>, <2 x i32>, ptr) nounwind readonly 913declare void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float>, <2 x float>, ptr) nounwind readonly 914declare void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64>, <1 x i64>, ptr) nounwind readonly 915declare void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double>, <1 x double>, ptr) nounwind readonly 916 917define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, ptr %addr) { 918; CHECK-LABEL: st1_x2_v8i8: 919; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 920 call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %A, <8 x i8> %B, ptr %addr) 921 ret void 922} 923 924define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, ptr %addr) { 925; CHECK-LABEL: st1_x2_v4i16: 926; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 927 call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %A, <4 x i16> %B, ptr %addr) 928 ret void 929} 930 931define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, ptr %addr) { 932; CHECK-LABEL: st1_x2_v2i32: 933; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 934 call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %A, <2 x i32> %B, ptr %addr) 935 ret void 936} 937 938define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, ptr %addr) { 939; CHECK-LABEL: st1_x2_v2f32: 940; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 941 call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %A, <2 x float> %B, ptr %addr) 942 ret void 943} 944 945define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, ptr %addr) { 946; CHECK-LABEL: st1_x2_v1i64: 947; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 948 call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %A, <1 x i64> %B, ptr %addr) 949 ret void 950} 951 952define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, ptr %addr) { 953; CHECK-LABEL: st1_x2_v1f64: 954; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 955 call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %A, <1 x double> %B, ptr %addr) 956 ret void 957} 958 959declare void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8>, <16 x i8>, ptr) nounwind readonly 960declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) nounwind readonly 961declare void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) nounwind readonly 962declare void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float>, <4 x float>, ptr) nounwind readonly 963declare void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64>, <2 x i64>, ptr) nounwind readonly 964declare void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double>, <2 x double>, ptr) nounwind readonly 965 966define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, ptr %addr) { 967; CHECK-LABEL: st1_x2_v16i8: 968; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 969 call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %A, <16 x i8> %B, ptr %addr) 970 ret void 971} 972 973define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, ptr %addr) { 974; CHECK-LABEL: st1_x2_v8i16: 975; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 976 call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %A, <8 x i16> %B, ptr %addr) 977 ret void 978} 979 980define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, ptr %addr) { 981; CHECK-LABEL: st1_x2_v4i32: 982; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 983 call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %A, <4 x i32> %B, ptr %addr) 984 ret void 985} 986 987define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, ptr %addr) { 988; CHECK-LABEL: st1_x2_v4f32: 989; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 990 call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %A, <4 x float> %B, ptr %addr) 991 ret void 992} 993 994define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, ptr %addr) { 995; CHECK-LABEL: st1_x2_v2i64: 996; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 997 call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %A, <2 x i64> %B, ptr %addr) 998 ret void 999} 1000 1001define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, ptr %addr) { 1002; CHECK-LABEL: st1_x2_v2f64: 1003; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1004 call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %A, <2 x double> %B, ptr %addr) 1005 ret void 1006} 1007 1008declare void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr) nounwind readonly 1009declare void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, ptr) nounwind readonly 1010declare void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, ptr) nounwind readonly 1011declare void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, ptr) nounwind readonly 1012declare void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, ptr) nounwind readonly 1013declare void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, ptr) nounwind readonly 1014 1015define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %addr) { 1016; CHECK-LABEL: st1_x3_v8i8: 1017; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1018 call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %addr) 1019 ret void 1020} 1021 1022define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %addr) { 1023; CHECK-LABEL: st1_x3_v4i16: 1024; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1025 call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %addr) 1026 ret void 1027} 1028 1029define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %addr) { 1030; CHECK-LABEL: st1_x3_v2i32: 1031; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1032 call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %addr) 1033 ret void 1034} 1035 1036define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, ptr %addr) { 1037; CHECK-LABEL: st1_x3_v2f32: 1038; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1039 call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %A, <2 x float> %B, <2 x float> %C, ptr %addr) 1040 ret void 1041} 1042 1043define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %addr) { 1044; CHECK-LABEL: st1_x3_v1i64: 1045; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1046 call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %addr) 1047 ret void 1048} 1049 1050define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, ptr %addr) { 1051; CHECK-LABEL: st1_x3_v1f64: 1052; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1053 call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %A, <1 x double> %B, <1 x double> %C, ptr %addr) 1054 ret void 1055} 1056 1057declare void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, ptr) nounwind readonly 1058declare void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, ptr) nounwind readonly 1059declare void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, ptr) nounwind readonly 1060declare void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, ptr) nounwind readonly 1061declare void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, ptr) nounwind readonly 1062declare void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, ptr) nounwind readonly 1063 1064define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %addr) { 1065; CHECK-LABEL: st1_x3_v16i8: 1066; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1067 call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %addr) 1068 ret void 1069} 1070 1071define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %addr) { 1072; CHECK-LABEL: st1_x3_v8i16: 1073; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1074 call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %addr) 1075 ret void 1076} 1077 1078define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %addr) { 1079; CHECK-LABEL: st1_x3_v4i32: 1080; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1081 call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %addr) 1082 ret void 1083} 1084 1085define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, ptr %addr) { 1086; CHECK-LABEL: st1_x3_v4f32: 1087; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1088 call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %A, <4 x float> %B, <4 x float> %C, ptr %addr) 1089 ret void 1090} 1091 1092define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %addr) { 1093; CHECK-LABEL: st1_x3_v2i64: 1094; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1095 call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %addr) 1096 ret void 1097} 1098 1099define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, ptr %addr) { 1100; CHECK-LABEL: st1_x3_v2f64: 1101; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1102 call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %A, <2 x double> %B, <2 x double> %C, ptr %addr) 1103 ret void 1104} 1105 1106 1107declare void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, ptr) nounwind readonly 1108declare void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, ptr) nounwind readonly 1109declare void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, ptr) nounwind readonly 1110declare void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, <2 x float>, ptr) nounwind readonly 1111declare void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, ptr) nounwind readonly 1112declare void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, <1 x double>, ptr) nounwind readonly 1113 1114define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %addr) { 1115; CHECK-LABEL: st1_x4_v8i8: 1116; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1117 call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %addr) 1118 ret void 1119} 1120 1121define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %addr) { 1122; CHECK-LABEL: st1_x4_v4i16: 1123; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1124 call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %addr) 1125 ret void 1126} 1127 1128define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %addr) { 1129; CHECK-LABEL: st1_x4_v2i32: 1130; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1131 call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %addr) 1132 ret void 1133} 1134 1135define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %addr) { 1136; CHECK-LABEL: st1_x4_v2f32: 1137; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1138 call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %addr) 1139 ret void 1140} 1141 1142define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %addr) { 1143; CHECK-LABEL: st1_x4_v1i64: 1144; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1145 call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %addr) 1146 ret void 1147} 1148 1149define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %addr) { 1150; CHECK-LABEL: st1_x4_v1f64: 1151; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1152 call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %addr) 1153 ret void 1154} 1155 1156declare void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, ptr) nounwind readonly 1157declare void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, ptr) nounwind readonly 1158declare void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, ptr) nounwind readonly 1159declare void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, <4 x float>, ptr) nounwind readonly 1160declare void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, ptr) nounwind readonly 1161declare void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, <2 x double>, ptr) nounwind readonly 1162 1163define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %addr) { 1164; CHECK-LABEL: st1_x4_v16i8: 1165; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1166 call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %addr) 1167 ret void 1168} 1169 1170define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %addr) { 1171; CHECK-LABEL: st1_x4_v8i16: 1172; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1173 call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %addr) 1174 ret void 1175} 1176 1177define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %addr) { 1178; CHECK-LABEL: st1_x4_v4i32: 1179; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1180 call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %addr) 1181 ret void 1182} 1183 1184define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %addr) { 1185; CHECK-LABEL: st1_x4_v4f32: 1186; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1187 call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %addr) 1188 ret void 1189} 1190 1191define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %addr) { 1192; CHECK-LABEL: st1_x4_v2i64: 1193; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1194 call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %addr) 1195 ret void 1196} 1197 1198define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %addr) { 1199; CHECK-LABEL: st1_x4_v2f64: 1200; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] 1201 call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %addr) 1202 ret void 1203} 1204