1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s 3 4define <8 x i8> @inserti8_first(ptr %p) { 5; CHECK-LABEL: inserti8_first: 6; CHECK: // %bb.0: 7; CHECK-NEXT: ldr d0, [x0] 8; CHECK-NEXT: ret 9 %q = getelementptr inbounds i8, ptr %p, i32 1 10 %l1 = load <8 x i8>, ptr %q 11 %l2 = load i8, ptr %p 12 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 13 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0 14 ret <8 x i8> %ins 15} 16 17define <8 x i8> @inserti8_last(ptr %p) { 18; CHECK-LABEL: inserti8_last: 19; CHECK: // %bb.0: 20; CHECK-NEXT: ldur d0, [x0, #1] 21; CHECK-NEXT: ret 22 %q = getelementptr inbounds i8, ptr %p, i32 8 23 %l1 = load <8 x i8>, ptr %p 24 %l2 = load i8, ptr %q 25 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> 26 %ins = insertelement <8 x i8> %s, i8 %l2, i32 7 27 ret <8 x i8> %ins 28} 29 30define <8 x i16> @inserti8_first_sext(ptr %p) { 31; CHECK-LABEL: inserti8_first_sext: 32; CHECK: // %bb.0: 33; CHECK-NEXT: ldr d0, [x0] 34; CHECK-NEXT: sshll v0.8h, v0.8b, #0 35; CHECK-NEXT: ret 36 %q = getelementptr inbounds i8, ptr %p, i32 1 37 %l1 = load <8 x i8>, ptr %q 38 %s1 = sext <8 x i8> %l1 to <8 x i16> 39 %l2 = load i8, ptr %p 40 %s2 = sext i8 %l2 to i16 41 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 42 %ins = insertelement <8 x i16> %s, i16 %s2, i32 0 43 ret <8 x i16> %ins 44} 45 46define <8 x i16> @inserti8_last_sext(ptr %p) { 47; CHECK-LABEL: inserti8_last_sext: 48; CHECK: // %bb.0: 49; CHECK-NEXT: ldur d0, [x0, #1] 50; CHECK-NEXT: sshll v0.8h, v0.8b, #0 51; CHECK-NEXT: ret 52 %q = getelementptr inbounds i8, ptr %p, i32 8 53 %l1 = load <8 x i8>, ptr %p 54 %s1 = sext <8 x i8> %l1 to <8 x i16> 55 %l2 = load i8, ptr %q 56 %s2 = sext i8 %l2 to i16 57 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> 58 %ins = insertelement <8 x i16> %s, i16 %s2, i32 7 59 ret <8 x i16> %ins 60} 61 62define <8 x i16> @inserti8_first_zext(ptr %p) { 63; CHECK-LABEL: inserti8_first_zext: 64; CHECK: // %bb.0: 65; CHECK-NEXT: ldr d0, [x0] 66; CHECK-NEXT: ushll v0.8h, v0.8b, #0 67; CHECK-NEXT: ret 68 %q = getelementptr inbounds i8, ptr %p, i32 1 69 %l1 = load <8 x i8>, ptr %q 70 %s1 = zext <8 x i8> %l1 to <8 x i16> 71 %l2 = load i8, ptr %p 72 %s2 = zext i8 %l2 to i16 73 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 74 %ins = insertelement <8 x i16> %s, i16 %s2, i32 0 75 ret <8 x i16> %ins 76} 77 78define <8 x i16> @inserti8_last_zext(ptr %p) { 79; CHECK-LABEL: inserti8_last_zext: 80; CHECK: // %bb.0: 81; CHECK-NEXT: ldur d0, [x0, #1] 82; CHECK-NEXT: ushll v0.8h, v0.8b, #0 83; CHECK-NEXT: ret 84 %q = getelementptr inbounds i8, ptr %p, i32 8 85 %l1 = load <8 x i8>, ptr %p 86 %s1 = zext <8 x i8> %l1 to <8 x i16> 87 %l2 = load i8, ptr %q 88 %s2 = zext i8 %l2 to i16 89 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> 90 %ins = insertelement <8 x i16> %s, i16 %s2, i32 7 91 ret <8 x i16> %ins 92} 93 94define <8 x i32> @inserti32_first(ptr %p) { 95; CHECK-LABEL: inserti32_first: 96; CHECK: // %bb.0: 97; CHECK-NEXT: ldp q0, q1, [x0] 98; CHECK-NEXT: ret 99 %q = getelementptr inbounds i8, ptr %p, i32 4 100 %l1 = load <8 x i32>, ptr %q 101 %l2 = load i32, ptr %p 102 %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 103 %ins = insertelement <8 x i32> %s, i32 %l2, i32 0 104 ret <8 x i32> %ins 105} 106 107define <8 x i32> @inserti32_last(ptr %p) { 108; CHECK-LABEL: inserti32_last: 109; CHECK: // %bb.0: 110; CHECK-NEXT: ldur q0, [x0, #4] 111; CHECK-NEXT: ldur q1, [x0, #20] 112; CHECK-NEXT: ret 113 %q = getelementptr inbounds i8, ptr %p, i32 32 114 %l1 = load <8 x i32>, ptr %p 115 %l2 = load i32, ptr %q 116 %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> 117 %ins = insertelement <8 x i32> %s, i32 %l2, i32 7 118 ret <8 x i32> %ins 119} 120 121define <8 x i32> @inserti32_first_multiuse(ptr %p) { 122; CHECK-LABEL: inserti32_first_multiuse: 123; CHECK: // %bb.0: 124; CHECK-NEXT: ldp q3, q2, [x0] 125; CHECK-NEXT: ldur q1, [x0, #20] 126; CHECK-NEXT: ldur q0, [x0, #4] 127; CHECK-NEXT: add v0.4s, v0.4s, v3.4s 128; CHECK-NEXT: add v1.4s, v1.4s, v2.4s 129; CHECK-NEXT: ret 130 %q = getelementptr inbounds i8, ptr %p, i32 4 131 %l1 = load <8 x i32>, ptr %q 132 %l2 = load i32, ptr %p 133 %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 134 %ins = insertelement <8 x i32> %s, i32 %l2, i32 0 135 %a = add <8 x i32> %l1, %ins 136 ret <8 x i32> %a 137} 138 139define <8 x i32> @inserti32_last_multiuse(ptr %p) { 140; CHECK-LABEL: inserti32_last_multiuse: 141; CHECK: // %bb.0: 142; CHECK-NEXT: ldp q0, q1, [x0] 143; CHECK-NEXT: ldur q2, [x0, #20] 144; CHECK-NEXT: ldur q3, [x0, #4] 145; CHECK-NEXT: add v0.4s, v0.4s, v3.4s 146; CHECK-NEXT: add v1.4s, v1.4s, v2.4s 147; CHECK-NEXT: ret 148 %q = getelementptr inbounds i8, ptr %p, i32 32 149 %l1 = load <8 x i32>, ptr %p 150 %l2 = load i32, ptr %q 151 %s = shufflevector <8 x i32> %l1, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> 152 %ins = insertelement <8 x i32> %s, i32 %l2, i32 7 153 %a = add <8 x i32> %l1, %ins 154 ret <8 x i32> %a 155} 156 157define <4 x float> @insertf32_first(ptr %p) { 158; CHECK-LABEL: insertf32_first: 159; CHECK: // %bb.0: 160; CHECK-NEXT: ldr q0, [x0] 161; CHECK-NEXT: ret 162 %q = getelementptr inbounds i8, ptr %p, i32 4 163 %l1 = load <4 x float>, ptr %q 164 %l2 = load float, ptr %p 165 %s = shufflevector <4 x float> %l1, <4 x float> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 2> 166 %ins = insertelement <4 x float> %s, float %l2, i32 0 167 ret <4 x float> %ins 168} 169 170define <4 x float> @insertf32_last(ptr %p) { 171; CHECK-LABEL: insertf32_last: 172; CHECK: // %bb.0: 173; CHECK-NEXT: ldur q0, [x0, #4] 174; CHECK-NEXT: ret 175 %q = getelementptr inbounds i8, ptr %p, i32 16 176 %l1 = load <4 x float>, ptr %p 177 %l2 = load float, ptr %q 178 %s = shufflevector <4 x float> %l1, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 undef> 179 %ins = insertelement <4 x float> %s, float %l2, i32 3 180 ret <4 x float> %ins 181} 182 183define <2 x i64> @inserti64_first(ptr %p) { 184; CHECK-LABEL: inserti64_first: 185; CHECK: // %bb.0: 186; CHECK-NEXT: ldr q0, [x0] 187; CHECK-NEXT: ret 188 %q = getelementptr inbounds i8, ptr %p, i32 8 189 %l1 = load <2 x i64>, ptr %q 190 %l2 = load i64, ptr %p 191 %s = shufflevector <2 x i64> %l1, <2 x i64> undef, <2 x i32> <i32 undef, i32 0> 192 %ins = insertelement <2 x i64> %s, i64 %l2, i32 0 193 ret <2 x i64> %ins 194} 195 196define <2 x i64> @inserti64_last(ptr %p) { 197; CHECK-LABEL: inserti64_last: 198; CHECK: // %bb.0: 199; CHECK-NEXT: ldur q0, [x0, #8] 200; CHECK-NEXT: ret 201 %q = getelementptr inbounds i8, ptr %p, i32 16 202 %l1 = load <2 x i64>, ptr %p 203 %l2 = load i64, ptr %q 204 %s = shufflevector <2 x i64> %l1, <2 x i64> undef, <2 x i32> <i32 1, i32 undef> 205 %ins = insertelement <2 x i64> %s, i64 %l2, i32 1 206 ret <2 x i64> %ins 207} 208 209define <8 x i8> @inserti8_first_undef(ptr %p) { 210; CHECK-LABEL: inserti8_first_undef: 211; CHECK: // %bb.0: 212; CHECK-NEXT: ldr d0, [x0] 213; CHECK-NEXT: ret 214 %q = getelementptr inbounds i8, ptr %p, i32 1 215 %l1 = load <8 x i8>, ptr %q 216 %l2 = load i8, ptr %p 217 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 undef, i32 3, i32 4, i32 5, i32 6> 218 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0 219 ret <8 x i8> %ins 220} 221 222define <8 x i8> @inserti8_last_undef(ptr %p) { 223; CHECK-LABEL: inserti8_last_undef: 224; CHECK: // %bb.0: 225; CHECK-NEXT: ldur d0, [x0, #1] 226; CHECK-NEXT: ret 227 %q = getelementptr inbounds i8, ptr %p, i32 8 228 %l1 = load <8 x i8>, ptr %p 229 %l2 = load i8, ptr %q 230 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 231 %ins = insertelement <8 x i8> %s, i8 %l2, i32 7 232 ret <8 x i8> %ins 233} 234 235 236 237define <8 x i16> @wrong_zextandsext(ptr %p) { 238; CHECK-LABEL: wrong_zextandsext: 239; CHECK: // %bb.0: 240; CHECK-NEXT: ldur d0, [x0, #1] 241; CHECK-NEXT: ldrsb w8, [x0] 242; CHECK-NEXT: ushll v0.8h, v0.8b, #0 243; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #14 244; CHECK-NEXT: mov v0.h[0], w8 245; CHECK-NEXT: ret 246 %q = getelementptr inbounds i8, ptr %p, i32 1 247 %l1 = load <8 x i8>, ptr %q 248 %s1 = zext <8 x i8> %l1 to <8 x i16> 249 %l2 = load i8, ptr %p 250 %s2 = sext i8 %l2 to i16 251 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 252 %ins = insertelement <8 x i16> %s, i16 %s2, i32 0 253 ret <8 x i16> %ins 254} 255 256define <8 x i8> @wrongidx_first(ptr %p) { 257; CHECK-LABEL: wrongidx_first: 258; CHECK: // %bb.0: 259; CHECK-NEXT: ldur d0, [x0, #1] 260; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #7 261; CHECK-NEXT: ld1 { v0.b }[7], [x0] 262; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 263; CHECK-NEXT: ret 264 %q = getelementptr inbounds i8, ptr %p, i32 1 265 %l1 = load <8 x i8>, ptr %q 266 %l2 = load i8, ptr %p 267 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 268 %ins = insertelement <8 x i8> %s, i8 %l2, i32 7 269 ret <8 x i8> %ins 270} 271 272define <8 x i8> @wrong_last(ptr %p) { 273; CHECK-LABEL: wrong_last: 274; CHECK: // %bb.0: 275; CHECK-NEXT: ldr d0, [x0] 276; CHECK-NEXT: add x8, x0, #8 277; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #1 278; CHECK-NEXT: ld1 { v0.b }[0], [x8] 279; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 280; CHECK-NEXT: ret 281 %q = getelementptr inbounds i8, ptr %p, i32 8 282 %l1 = load <8 x i8>, ptr %p 283 %l2 = load i8, ptr %q 284 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> 285 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0 286 ret <8 x i8> %ins 287} 288 289define <8 x i8> @wrong_shuffle(ptr %p) { 290; CHECK-LABEL: wrong_shuffle: 291; CHECK: // %bb.0: 292; CHECK-NEXT: ldur d0, [x0, #1] 293; CHECK-NEXT: adrp x8, .LCPI19_0 294; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI19_0] 295; CHECK-NEXT: mov v0.d[1], v0.d[0] 296; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b 297; CHECK-NEXT: ld1 { v0.b }[0], [x0] 298; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 299; CHECK-NEXT: ret 300 %q = getelementptr inbounds i8, ptr %p, i32 1 301 %l1 = load <8 x i8>, ptr %q 302 %l2 = load i8, ptr %p 303 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6> 304 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0 305 ret <8 x i8> %ins 306} 307 308define <8 x i16> @wrong_exttype(ptr %p) { 309; CHECK-LABEL: wrong_exttype: 310; CHECK: // %bb.0: 311; CHECK-NEXT: ldur d0, [x0, #1] 312; CHECK-NEXT: sshll v0.8h, v0.8b, #0 313; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #14 314; CHECK-NEXT: ld1 { v0.h }[0], [x0] 315; CHECK-NEXT: ret 316 %q = getelementptr inbounds i8, ptr %p, i32 1 317 %l1 = load <8 x i8>, ptr %q 318 %s1 = sext <8 x i8> %l1 to <8 x i16> 319 %l2 = load i16, ptr %p 320 %s = shufflevector <8 x i16> %s1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 321 %ins = insertelement <8 x i16> %s, i16 %l2, i32 0 322 ret <8 x i16> %ins 323} 324 325define <4 x i32> @wrong_exttype2(ptr %p) { 326; CHECK-LABEL: wrong_exttype2: 327; CHECK: // %bb.0: 328; CHECK-NEXT: ldur s0, [x0, #1] 329; CHECK-NEXT: ldrsh w8, [x0] 330; CHECK-NEXT: sshll v0.8h, v0.8b, #0 331; CHECK-NEXT: sshll v0.4s, v0.4h, #0 332; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #12 333; CHECK-NEXT: mov v0.s[0], w8 334; CHECK-NEXT: ret 335 %q = getelementptr inbounds i8, ptr %p, i32 1 336 %l1 = load <4 x i8>, ptr %q 337 %s1 = sext <4 x i8> %l1 to <4 x i32> 338 %l2 = load i16, ptr %p 339 %s2 = sext i16 %l2 to i32 340 %s = shufflevector <4 x i32> %s1, <4 x i32> undef, <4 x i32> <i32 undef, i32 0, i32 1, i32 2> 341 %ins = insertelement <4 x i32> %s, i32 %s2, i32 0 342 ret <4 x i32> %ins 343} 344 345define <8 x i8> @wrong_offsetfirst(ptr %p) { 346; CHECK-LABEL: wrong_offsetfirst: 347; CHECK: // %bb.0: 348; CHECK-NEXT: ldur d0, [x0, #-1] 349; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #7 350; CHECK-NEXT: ld1 { v0.b }[0], [x0] 351; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 352; CHECK-NEXT: ret 353 %q = getelementptr inbounds i8, ptr %p, i32 -1 354 %l1 = load <8 x i8>, ptr %q 355 %l2 = load i8, ptr %p 356 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 357 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0 358 ret <8 x i8> %ins 359} 360 361define <8 x i8> @wrong_offsetlast(ptr %p) { 362; CHECK-LABEL: wrong_offsetlast: 363; CHECK: // %bb.0: 364; CHECK-NEXT: ldr d0, [x0] 365; CHECK-NEXT: add x8, x0, #7 366; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #1 367; CHECK-NEXT: ld1 { v0.b }[7], [x8] 368; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 369; CHECK-NEXT: ret 370 %q = getelementptr inbounds i8, ptr %p, i32 7 371 %l1 = load <8 x i8>, ptr %p 372 %l2 = load i8, ptr %q 373 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef> 374 %ins = insertelement <8 x i8> %s, i8 %l2, i32 7 375 ret <8 x i8> %ins 376} 377 378 379define <8 x i8> @storebetween(ptr %p, ptr %r) { 380; CHECK-LABEL: storebetween: 381; CHECK: // %bb.0: 382; CHECK-NEXT: ldur d0, [x0, #1] 383; CHECK-NEXT: strb wzr, [x1] 384; CHECK-NEXT: ext v0.8b, v0.8b, v0.8b, #7 385; CHECK-NEXT: ld1 { v0.b }[0], [x0] 386; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 387; CHECK-NEXT: ret 388 %q = getelementptr inbounds i8, ptr %p, i32 1 389 %l1 = load <8 x i8>, ptr %q 390 store i8 0, ptr %r 391 %l2 = load i8, ptr %p 392 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 393 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0 394 ret <8 x i8> %ins 395} 396 397define <8 x i8> @storebefore(ptr %p, ptr %r) { 398; CHECK-LABEL: storebefore: 399; CHECK: // %bb.0: 400; CHECK-NEXT: strb wzr, [x1] 401; CHECK-NEXT: ldr d0, [x0] 402; CHECK-NEXT: ret 403 %q = getelementptr inbounds i8, ptr %p, i32 1 404 store i8 0, ptr %r 405 %l1 = load <8 x i8>, ptr %q 406 %l2 = load i8, ptr %p 407 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 408 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0 409 ret <8 x i8> %ins 410} 411 412define <8 x i8> @storeafter(ptr %p, ptr %r) { 413; CHECK-LABEL: storeafter: 414; CHECK: // %bb.0: 415; CHECK-NEXT: ldr d0, [x0] 416; CHECK-NEXT: strb wzr, [x1] 417; CHECK-NEXT: ret 418 %q = getelementptr inbounds i8, ptr %p, i32 1 419 %l1 = load <8 x i8>, ptr %q 420 %l2 = load i8, ptr %p 421 store i8 0, ptr %r 422 %s = shufflevector <8 x i8> %l1, <8 x i8> undef, <8 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6> 423 %ins = insertelement <8 x i8> %s, i8 %l2, i32 0 424 ret <8 x i8> %ins 425} 426