1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s 3 4 5; i8 6 7define <16 x i8> @insert_v16i8_2_1(float %tmp, <16 x i8> %b, <16 x i8> %a) { 8; CHECK-LABEL: insert_v16i8_2_1: 9; CHECK: // %bb.0: 10; CHECK-NEXT: mov v0.16b, v1.16b 11; CHECK-NEXT: mov v0.h[0], v2.h[0] 12; CHECK-NEXT: ret 13 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14 ret <16 x i8> %s2 15} 16 17define <16 x i8> @insert_v16i8_2_2(float %tmp, <16 x i8> %b, <16 x i8> %a) { 18; CHECK-LABEL: insert_v16i8_2_2: 19; CHECK: // %bb.0: 20; CHECK-NEXT: mov v0.16b, v1.16b 21; CHECK-NEXT: mov v0.h[1], v2.h[0] 22; CHECK-NEXT: ret 23 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 24 ret <16 x i8> %s2 25} 26 27define <16 x i8> @insert_v16i8_2_6(float %tmp, <16 x i8> %b, <16 x i8> %a) { 28; CHECK-LABEL: insert_v16i8_2_6: 29; CHECK: // %bb.0: 30; CHECK-NEXT: mov v0.16b, v1.16b 31; CHECK-NEXT: mov v0.h[6], v2.h[0] 32; CHECK-NEXT: ret 33 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 0, i32 1, i32 30, i32 31> 34 ret <16 x i8> %s2 35} 36 37define <16 x i8> @insert_v16i8_4_1(float %tmp, <16 x i8> %b, <16 x i8> %a) { 38; CHECK-LABEL: insert_v16i8_4_1: 39; CHECK: // %bb.0: 40; CHECK-NEXT: mov v0.16b, v1.16b 41; CHECK-NEXT: mov v0.s[0], v2.s[0] 42; CHECK-NEXT: ret 43 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 44 ret <16 x i8> %s2 45} 46 47define <16 x i8> @insert_v16i8_4_15(float %tmp, <16 x i8> %b, <16 x i8> %a) { 48; CHECK-LABEL: insert_v16i8_4_15: 49; CHECK: // %bb.0: 50; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 51; CHECK-NEXT: adrp x8, .LCPI4_0 52; CHECK-NEXT: mov v3.16b, v1.16b 53; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] 54; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b 55; CHECK-NEXT: ret 56 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 2, i32 3, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 57 ret <16 x i8> %s2 58} 59 60define <16 x i8> @insert_v16i8_4_2(float %tmp, <16 x i8> %b, <16 x i8> %a) { 61; CHECK-LABEL: insert_v16i8_4_2: 62; CHECK: // %bb.0: 63; CHECK-NEXT: mov v0.16b, v1.16b 64; CHECK-NEXT: mov v0.s[1], v2.s[0] 65; CHECK-NEXT: ret 66 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 67 ret <16 x i8> %s2 68} 69 70define <16 x i8> @insert_v16i8_4_3(float %tmp, <16 x i8> %b, <16 x i8> %a) { 71; CHECK-LABEL: insert_v16i8_4_3: 72; CHECK: // %bb.0: 73; CHECK-NEXT: mov v0.16b, v1.16b 74; CHECK-NEXT: mov v0.s[2], v2.s[0] 75; CHECK-NEXT: ret 76 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31> 77 ret <16 x i8> %s2 78} 79 80define <16 x i8> @insert_v16i8_4_4(float %tmp, <16 x i8> %b, <16 x i8> %a) { 81; CHECK-LABEL: insert_v16i8_4_4: 82; CHECK: // %bb.0: 83; CHECK-NEXT: mov v0.16b, v1.16b 84; CHECK-NEXT: mov v0.s[3], v2.s[0] 85; CHECK-NEXT: ret 86 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 0, i32 1, i32 2, i32 3> 87 ret <16 x i8> %s2 88} 89 90define <8 x i8> @insert_v8i8_4_1(float %tmp, <8 x i8> %b, <8 x i8> %a) { 91; CHECK-LABEL: insert_v8i8_4_1: 92; CHECK: // %bb.0: 93; CHECK-NEXT: fmov d0, d2 94; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 95; CHECK-NEXT: mov v0.s[1], v1.s[1] 96; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 97; CHECK-NEXT: ret 98 %s2 = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 99 ret <8 x i8> %s2 100} 101 102define <8 x i8> @insert_v8i8_4_2(float %tmp, <8 x i8> %b, <8 x i8> %a) { 103; CHECK-LABEL: insert_v8i8_4_2: 104; CHECK: // %bb.0: 105; CHECK-NEXT: fmov d0, d1 106; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 107; CHECK-NEXT: mov v0.s[1], v2.s[0] 108; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 109; CHECK-NEXT: ret 110 %s2 = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3> 111 ret <8 x i8> %s2 112} 113 114define <16 x i8> @insert_v16i8_8_1(float %tmp, <16 x i8> %b, <16 x i8> %a) { 115; CHECK-LABEL: insert_v16i8_8_1: 116; CHECK: // %bb.0: 117; CHECK-NEXT: mov v0.16b, v2.16b 118; CHECK-NEXT: mov v0.d[1], v1.d[1] 119; CHECK-NEXT: ret 120 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 121 ret <16 x i8> %s2 122} 123 124define <16 x i8> @insert_v16i8_8_2(float %tmp, <16 x i8> %b, <16 x i8> %a) { 125; CHECK-LABEL: insert_v16i8_8_2: 126; CHECK: // %bb.0: 127; CHECK-NEXT: mov v0.16b, v1.16b 128; CHECK-NEXT: mov v0.d[1], v2.d[0] 129; CHECK-NEXT: ret 130 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 131 ret <16 x i8> %s2 132} 133 134; i16 135 136define <8 x i16> @insert_v8i16_2_1(float %tmp, <8 x i16> %b, <8 x i16> %a) { 137; CHECK-LABEL: insert_v8i16_2_1: 138; CHECK: // %bb.0: 139; CHECK-NEXT: mov v0.16b, v1.16b 140; CHECK-NEXT: mov v0.s[0], v2.s[0] 141; CHECK-NEXT: ret 142 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 143 ret <8 x i16> %s2 144} 145 146define <8 x i16> @insert_v8i16_2_15(float %tmp, <8 x i16> %b, <8 x i16> %a) { 147; CHECK-LABEL: insert_v8i16_2_15: 148; CHECK: // %bb.0: 149; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 150; CHECK-NEXT: adrp x8, .LCPI13_0 151; CHECK-NEXT: mov v3.16b, v1.16b 152; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_0] 153; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b 154; CHECK-NEXT: ret 155 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 1, i32 11, i32 12, i32 13, i32 14, i32 15> 156 ret <8 x i16> %s2 157} 158 159define <8 x i16> @insert_v8i16_2_2(float %tmp, <8 x i16> %b, <8 x i16> %a) { 160; CHECK-LABEL: insert_v8i16_2_2: 161; CHECK: // %bb.0: 162; CHECK-NEXT: mov v0.16b, v1.16b 163; CHECK-NEXT: mov v0.s[1], v2.s[0] 164; CHECK-NEXT: ret 165 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 0, i32 1, i32 12, i32 13, i32 14, i32 15> 166 ret <8 x i16> %s2 167} 168 169define <8 x i16> @insert_v8i16_2_3(float %tmp, <8 x i16> %b, <8 x i16> %a) { 170; CHECK-LABEL: insert_v8i16_2_3: 171; CHECK: // %bb.0: 172; CHECK-NEXT: mov v0.16b, v1.16b 173; CHECK-NEXT: mov v0.s[2], v2.s[0] 174; CHECK-NEXT: ret 175 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 14, i32 15> 176 ret <8 x i16> %s2 177} 178 179define <8 x i16> @insert_v8i16_2_4(float %tmp, <8 x i16> %b, <8 x i16> %a) { 180; CHECK-LABEL: insert_v8i16_2_4: 181; CHECK: // %bb.0: 182; CHECK-NEXT: mov v0.16b, v1.16b 183; CHECK-NEXT: mov v0.s[3], v2.s[0] 184; CHECK-NEXT: ret 185 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 0, i32 1> 186 ret <8 x i16> %s2 187} 188 189define <4 x i16> @insert_v4i16_2_1(float %tmp, <4 x i16> %b, <4 x i16> %a) { 190; CHECK-LABEL: insert_v4i16_2_1: 191; CHECK: // %bb.0: 192; CHECK-NEXT: fmov d0, d2 193; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 194; CHECK-NEXT: mov v0.s[1], v1.s[1] 195; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 196; CHECK-NEXT: ret 197 %s2 = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 198 ret <4 x i16> %s2 199} 200 201define <4 x i16> @insert_v4i16_2_2(float %tmp, <4 x i16> %b, <4 x i16> %a) { 202; CHECK-LABEL: insert_v4i16_2_2: 203; CHECK: // %bb.0: 204; CHECK-NEXT: fmov d0, d1 205; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 206; CHECK-NEXT: mov v0.s[1], v2.s[0] 207; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 208; CHECK-NEXT: ret 209 %s2 = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 210 ret <4 x i16> %s2 211} 212 213define <8 x i16> @insert_v8i16_4_1(float %tmp, <8 x i16> %b, <8 x i16> %a) { 214; CHECK-LABEL: insert_v8i16_4_1: 215; CHECK: // %bb.0: 216; CHECK-NEXT: mov v0.16b, v2.16b 217; CHECK-NEXT: mov v0.d[1], v1.d[1] 218; CHECK-NEXT: ret 219 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 220 ret <8 x i16> %s2 221} 222 223define <8 x i16> @insert_v8i16_4_2(float %tmp, <8 x i16> %b, <8 x i16> %a) { 224; CHECK-LABEL: insert_v8i16_4_2: 225; CHECK: // %bb.0: 226; CHECK-NEXT: mov v0.16b, v1.16b 227; CHECK-NEXT: mov v0.d[1], v2.d[0] 228; CHECK-NEXT: ret 229 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3> 230 ret <8 x i16> %s2 231} 232 233; i32 234 235define <4 x i32> @insert_v4i32_2_1(float %tmp, <4 x i32> %b, <4 x i32> %a) { 236; CHECK-LABEL: insert_v4i32_2_1: 237; CHECK: // %bb.0: 238; CHECK-NEXT: mov v0.16b, v2.16b 239; CHECK-NEXT: mov v0.d[1], v1.d[1] 240; CHECK-NEXT: ret 241 %s2 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 242 ret <4 x i32> %s2 243} 244 245define <4 x i32> @insert_v4i32_2_2(float %tmp, <4 x i32> %b, <4 x i32> %a) { 246; CHECK-LABEL: insert_v4i32_2_2: 247; CHECK: // %bb.0: 248; CHECK-NEXT: mov v0.16b, v1.16b 249; CHECK-NEXT: mov v0.d[1], v2.d[0] 250; CHECK-NEXT: ret 251 %s2 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 252 ret <4 x i32> %s2 253} 254 255 256 257 258; i8 259 260define <16 x i8> @load_v16i8_4_1(float %tmp, <16 x i8> %b, ptr %a) { 261; CHECK-LABEL: load_v16i8_4_1: 262; CHECK: // %bb.0: 263; CHECK-NEXT: mov v0.16b, v1.16b 264; CHECK-NEXT: ld1 { v0.s }[0], [x0] 265; CHECK-NEXT: ret 266 %l = load <4 x i8>, ptr %a 267 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 268 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 269 ret <16 x i8> %s2 270} 271 272define <16 x i8> @load_v16i8_4_15(float %tmp, <16 x i8> %b, ptr %a) { 273; CHECK-LABEL: load_v16i8_4_15: 274; CHECK: // %bb.0: 275; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 276; CHECK-NEXT: adrp x8, .LCPI24_0 277; CHECK-NEXT: ldr s0, [x0] 278; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_0] 279; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 280; CHECK-NEXT: ret 281 %l = load <4 x i8>, ptr %a 282 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 283 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 2, i32 3, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 284 ret <16 x i8> %s2 285} 286 287define <16 x i8> @load_v16i8_4_2(float %tmp, <16 x i8> %b, ptr %a) { 288; CHECK-LABEL: load_v16i8_4_2: 289; CHECK: // %bb.0: 290; CHECK-NEXT: mov v0.16b, v1.16b 291; CHECK-NEXT: ld1 { v0.s }[1], [x0] 292; CHECK-NEXT: ret 293 %l = load <4 x i8>, ptr %a 294 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 295 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 296 ret <16 x i8> %s2 297} 298 299define <16 x i8> @load_v16i8_4_3(float %tmp, <16 x i8> %b, ptr %a) { 300; CHECK-LABEL: load_v16i8_4_3: 301; CHECK: // %bb.0: 302; CHECK-NEXT: mov v0.16b, v1.16b 303; CHECK-NEXT: ld1 { v0.s }[2], [x0] 304; CHECK-NEXT: ret 305 %l = load <4 x i8>, ptr %a 306 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 307 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31> 308 ret <16 x i8> %s2 309} 310 311define <16 x i8> @load_v16i8_4_4(float %tmp, <16 x i8> %b, ptr %a) { 312; CHECK-LABEL: load_v16i8_4_4: 313; CHECK: // %bb.0: 314; CHECK-NEXT: mov v0.16b, v1.16b 315; CHECK-NEXT: ld1 { v0.s }[3], [x0] 316; CHECK-NEXT: ret 317 %l = load <4 x i8>, ptr %a 318 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 319 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 0, i32 1, i32 2, i32 3> 320 ret <16 x i8> %s2 321} 322 323define <8 x i8> @load_v8i8_4_1(float %tmp, <8 x i8> %b, ptr %a) { 324; CHECK-LABEL: load_v8i8_4_1: 325; CHECK: // %bb.0: 326; CHECK-NEXT: ldr s0, [x0] 327; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 328; CHECK-NEXT: mov v0.s[1], v1.s[1] 329; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 330; CHECK-NEXT: ret 331 %l = load <4 x i8>, ptr %a 332 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 333 %s2 = shufflevector <8 x i8> %s1, <8 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 334 ret <8 x i8> %s2 335} 336 337define <8 x i8> @load_v8i8_4_2(float %tmp, <8 x i8> %b, ptr %a) { 338; CHECK-LABEL: load_v8i8_4_2: 339; CHECK: // %bb.0: 340; CHECK-NEXT: fmov d0, d1 341; CHECK-NEXT: ldr s2, [x0] 342; CHECK-NEXT: mov v0.s[1], v2.s[0] 343; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 344; CHECK-NEXT: ret 345 %l = load <4 x i8>, ptr %a 346 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 347 %s2 = shufflevector <8 x i8> %s1, <8 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3> 348 ret <8 x i8> %s2 349} 350 351define <16 x i8> @load_v16i8_8_1(float %tmp, <16 x i8> %b, ptr %a) { 352; CHECK-LABEL: load_v16i8_8_1: 353; CHECK: // %bb.0: 354; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 355; CHECK-NEXT: ldr d0, [x0] 356; CHECK-NEXT: mov v0.d[1], v1.d[0] 357; CHECK-NEXT: ret 358 %l = load <8 x i8>, ptr %a 359 %s1 = shufflevector <8 x i8> %l, <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 360 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 361 ret <16 x i8> %s2 362} 363 364define <16 x i8> @load_v16i8_8_2(float %tmp, <16 x i8> %b, ptr %a) { 365; CHECK-LABEL: load_v16i8_8_2: 366; CHECK: // %bb.0: 367; CHECK-NEXT: mov v0.16b, v1.16b 368; CHECK-NEXT: ldr d2, [x0] 369; CHECK-NEXT: mov v0.d[1], v2.d[0] 370; CHECK-NEXT: ret 371 %l = load <8 x i8>, ptr %a 372 %s1 = shufflevector <8 x i8> %l, <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 373 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 374 ret <16 x i8> %s2 375} 376 377define <8 x i8> @load_v8i8_2_1(float %tmp, <8 x i8> %b, ptr %a) { 378; CHECK-LABEL: load_v8i8_2_1: 379; CHECK: // %bb.0: 380; CHECK-NEXT: fmov d0, d1 381; CHECK-NEXT: ldr h2, [x0] 382; CHECK-NEXT: mov v0.h[0], v2.h[0] 383; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 384; CHECK-NEXT: ret 385 %l = load <2 x i8>, ptr %a 386 %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 387 %s2 = shufflevector <8 x i8> %s1, <8 x i8> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 388 ret <8 x i8> %s2 389} 390 391define <8 x i8> @load_v8i8_2_15(float %tmp, <8 x i8> %b, ptr %a) { 392; CHECK-LABEL: load_v8i8_2_15: 393; CHECK: // %bb.0: 394; CHECK-NEXT: ldr h0, [x0] 395; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 396; CHECK-NEXT: adrp x8, .LCPI33_0 397; CHECK-NEXT: mov v0.d[1], v1.d[0] 398; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI33_0] 399; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b 400; CHECK-NEXT: ret 401 %l = load <2 x i8>, ptr %a 402 %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 403 %s2 = shufflevector <8 x i8> %s1, <8 x i8> %b, <8 x i32> <i32 8, i32 0, i32 1, i32 11, i32 12, i32 13, i32 14, i32 15> 404 ret <8 x i8> %s2 405} 406 407define <8 x i8> @load_v8i8_2_2(float %tmp, <8 x i8> %b, ptr %a) { 408; CHECK-LABEL: load_v8i8_2_2: 409; CHECK: // %bb.0: 410; CHECK-NEXT: fmov d0, d1 411; CHECK-NEXT: ldr h2, [x0] 412; CHECK-NEXT: mov v0.h[1], v2.h[0] 413; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 414; CHECK-NEXT: ret 415 %l = load <2 x i8>, ptr %a 416 %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 417 %s2 = shufflevector <8 x i8> %s1, <8 x i8> %b, <8 x i32> <i32 8, i32 9, i32 0, i32 1, i32 12, i32 13, i32 14, i32 15> 418 ret <8 x i8> %s2 419} 420 421define <8 x i8> @load_v8i8_2_3(float %tmp, <8 x i8> %b, ptr %a) { 422; CHECK-LABEL: load_v8i8_2_3: 423; CHECK: // %bb.0: 424; CHECK-NEXT: fmov d0, d1 425; CHECK-NEXT: ldr h2, [x0] 426; CHECK-NEXT: mov v0.h[2], v2.h[0] 427; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 428; CHECK-NEXT: ret 429 %l = load <2 x i8>, ptr %a 430 %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 431 %s2 = shufflevector <8 x i8> %s1, <8 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 14, i32 15> 432 ret <8 x i8> %s2 433} 434 435define <8 x i8> @load_v8i8_2_4(float %tmp, <8 x i8> %b, ptr %a) { 436; CHECK-LABEL: load_v8i8_2_4: 437; CHECK: // %bb.0: 438; CHECK-NEXT: fmov d0, d1 439; CHECK-NEXT: ldr h2, [x0] 440; CHECK-NEXT: mov v0.h[3], v2.h[0] 441; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 442; CHECK-NEXT: ret 443 %l = load <2 x i8>, ptr %a 444 %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 445 %s2 = shufflevector <8 x i8> %s1, <8 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 0, i32 1> 446 ret <8 x i8> %s2 447} 448 449define <4 x i8> @load_v4i8_2_1(float %tmp, <4 x i8> %b, ptr %a) { 450; CHECK-LABEL: load_v4i8_2_1: 451; CHECK: // %bb.0: 452; CHECK-NEXT: ldr h0, [x0] 453; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 454; CHECK-NEXT: zip1 v0.8b, v0.8b, v0.8b 455; CHECK-NEXT: mov v0.s[1], v1.s[1] 456; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 457; CHECK-NEXT: ret 458 %l = load <2 x i8>, ptr %a 459 %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 460 %s2 = shufflevector <4 x i8> %s1, <4 x i8> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 461 ret <4 x i8> %s2 462} 463 464define <4 x i8> @load_v4i8_2_2(float %tmp, <4 x i8> %b, ptr %a) { 465; CHECK-LABEL: load_v4i8_2_2: 466; CHECK: // %bb.0: 467; CHECK-NEXT: ldr h0, [x0] 468; CHECK-NEXT: zip1 v2.8b, v0.8b, v0.8b 469; CHECK-NEXT: fmov d0, d1 470; CHECK-NEXT: mov v0.s[1], v2.s[0] 471; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 472; CHECK-NEXT: ret 473 %l = load <2 x i8>, ptr %a 474 %s1 = shufflevector <2 x i8> %l, <2 x i8> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 475 %s2 = shufflevector <4 x i8> %s1, <4 x i8> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 476 ret <4 x i8> %s2 477} 478 479; i16 480 481define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, ptr %a) { 482; CHECK-LABEL: load_v8i16_2_1: 483; CHECK: // %bb.0: 484; CHECK-NEXT: mov v0.16b, v1.16b 485; CHECK-NEXT: ld1 { v0.s }[0], [x0] 486; CHECK-NEXT: ret 487 %l = load <2 x i16>, ptr %a 488 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 489 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 490 ret <8 x i16> %s2 491} 492 493define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, ptr %a) { 494; CHECK-LABEL: load_v8i16_2_15: 495; CHECK: // %bb.0: 496; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 497; CHECK-NEXT: adrp x8, .LCPI40_0 498; CHECK-NEXT: ldr s0, [x0] 499; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI40_0] 500; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 501; CHECK-NEXT: ret 502 %l = load <2 x i16>, ptr %a 503 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 504 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 1, i32 11, i32 12, i32 13, i32 14, i32 15> 505 ret <8 x i16> %s2 506} 507 508define <8 x i16> @load_v8i16_2_2(float %tmp, <8 x i16> %b, ptr %a) { 509; CHECK-LABEL: load_v8i16_2_2: 510; CHECK: // %bb.0: 511; CHECK-NEXT: mov v0.16b, v1.16b 512; CHECK-NEXT: ld1 { v0.s }[1], [x0] 513; CHECK-NEXT: ret 514 %l = load <2 x i16>, ptr %a 515 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 516 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 0, i32 1, i32 12, i32 13, i32 14, i32 15> 517 ret <8 x i16> %s2 518} 519 520define <8 x i16> @load_v8i16_2_3(float %tmp, <8 x i16> %b, ptr %a) { 521; CHECK-LABEL: load_v8i16_2_3: 522; CHECK: // %bb.0: 523; CHECK-NEXT: mov v0.16b, v1.16b 524; CHECK-NEXT: ld1 { v0.s }[2], [x0] 525; CHECK-NEXT: ret 526 %l = load <2 x i16>, ptr %a 527 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 528 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 14, i32 15> 529 ret <8 x i16> %s2 530} 531 532define <8 x i16> @load_v8i16_2_4(float %tmp, <8 x i16> %b, ptr %a) { 533; CHECK-LABEL: load_v8i16_2_4: 534; CHECK: // %bb.0: 535; CHECK-NEXT: mov v0.16b, v1.16b 536; CHECK-NEXT: ld1 { v0.s }[3], [x0] 537; CHECK-NEXT: ret 538 %l = load <2 x i16>, ptr %a 539 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 540 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 0, i32 1> 541 ret <8 x i16> %s2 542} 543 544define <4 x i16> @load_v4i16_2_1(float %tmp, <4 x i16> %b, ptr %a) { 545; CHECK-LABEL: load_v4i16_2_1: 546; CHECK: // %bb.0: 547; CHECK-NEXT: ldr s0, [x0] 548; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 549; CHECK-NEXT: mov v0.s[1], v1.s[1] 550; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 551; CHECK-NEXT: ret 552 %l = load <2 x i16>, ptr %a 553 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 554 %s2 = shufflevector <4 x i16> %s1, <4 x i16> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 555 ret <4 x i16> %s2 556} 557 558define <4 x i16> @load_v4i16_2_2(float %tmp, <4 x i16> %b, ptr %a) { 559; CHECK-LABEL: load_v4i16_2_2: 560; CHECK: // %bb.0: 561; CHECK-NEXT: fmov d0, d1 562; CHECK-NEXT: ldr s2, [x0] 563; CHECK-NEXT: mov v0.s[1], v2.s[0] 564; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 565; CHECK-NEXT: ret 566 %l = load <2 x i16>, ptr %a 567 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 568 %s2 = shufflevector <4 x i16> %s1, <4 x i16> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 569 ret <4 x i16> %s2 570} 571 572define <8 x i16> @load_v8i16_4_1(float %tmp, <8 x i16> %b, ptr %a) { 573; CHECK-LABEL: load_v8i16_4_1: 574; CHECK: // %bb.0: 575; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 576; CHECK-NEXT: ldr d0, [x0] 577; CHECK-NEXT: mov v0.d[1], v1.d[0] 578; CHECK-NEXT: ret 579 %l = load <4 x i16>, ptr %a 580 %s1 = shufflevector <4 x i16> %l, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 581 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 582 ret <8 x i16> %s2 583} 584 585define <8 x i16> @load_v8i16_4_2(float %tmp, <8 x i16> %b, ptr %a) { 586; CHECK-LABEL: load_v8i16_4_2: 587; CHECK: // %bb.0: 588; CHECK-NEXT: mov v0.16b, v1.16b 589; CHECK-NEXT: ldr d2, [x0] 590; CHECK-NEXT: mov v0.d[1], v2.d[0] 591; CHECK-NEXT: ret 592 %l = load <4 x i16>, ptr %a 593 %s1 = shufflevector <4 x i16> %l, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 594 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3> 595 ret <8 x i16> %s2 596} 597 598; i32 599 600define <4 x i32> @load_v4i32_2_1(float %tmp, <4 x i32> %b, ptr %a) { 601; CHECK-LABEL: load_v4i32_2_1: 602; CHECK: // %bb.0: 603; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 604; CHECK-NEXT: ldr d0, [x0] 605; CHECK-NEXT: mov v0.d[1], v1.d[0] 606; CHECK-NEXT: ret 607 %l = load <2 x i32>, ptr %a 608 %s1 = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 609 %s2 = shufflevector <4 x i32> %s1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 610 ret <4 x i32> %s2 611} 612 613define <4 x i32> @load_v4i32_2_2(float %tmp, <4 x i32> %b, ptr %a) { 614; CHECK-LABEL: load_v4i32_2_2: 615; CHECK: // %bb.0: 616; CHECK-NEXT: mov v0.16b, v1.16b 617; CHECK-NEXT: ldr d2, [x0] 618; CHECK-NEXT: mov v0.d[1], v2.d[0] 619; CHECK-NEXT: ret 620 %l = load <2 x i32>, ptr %a 621 %s1 = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 622 %s2 = shufflevector <4 x i32> %s1, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 623 ret <4 x i32> %s2 624} 625 626; More than a single vector 627 628define <8 x i8> @load2_v4i8(float %tmp, ptr %a, ptr %b) { 629; CHECK-LABEL: load2_v4i8: 630; CHECK: // %bb.0: 631; CHECK-NEXT: ldr s0, [x0] 632; CHECK-NEXT: ld1 { v0.s }[1], [x1] 633; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 634; CHECK-NEXT: ret 635 %la = load <4 x i8>, ptr %a 636 %lb = load <4 x i8>, ptr %b 637 %s1 = shufflevector <4 x i8> %la, <4 x i8> %lb, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 638 ret <8 x i8> %s1 639} 640 641define <16 x i8> @load3_v4i8(float %tmp, ptr %a, ptr %b) { 642; CHECK-LABEL: load3_v4i8: 643; CHECK: // %bb.0: 644; CHECK-NEXT: ldp s0, s1, [x0] 645; CHECK-NEXT: ld1 { v0.s }[1], [x1] 646; CHECK-NEXT: mov v0.d[1], v1.d[0] 647; CHECK-NEXT: ret 648 %la = load <4 x i8>, ptr %a 649 %lb = load <4 x i8>, ptr %b 650 %c = getelementptr <4 x i8>, ptr %a, i64 1 651 %d = getelementptr <4 x i8>, ptr %b, i64 1 652 %lc = load <4 x i8>, ptr %c 653 %s1 = shufflevector <4 x i8> %la, <4 x i8> %lb, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 654 %s2 = shufflevector <4 x i8> %lc, <4 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 655 %s3 = shufflevector <8 x i8> %s1, <8 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 656 ret <16 x i8> %s3 657} 658 659define <16 x i8> @load4_v4i8(float %tmp, ptr %a, ptr %b) { 660; CHECK-LABEL: load4_v4i8: 661; CHECK: // %bb.0: 662; CHECK-NEXT: ldp s0, s1, [x0] 663; CHECK-NEXT: ld1 { v0.s }[1], [x1], #4 664; CHECK-NEXT: ld1 { v1.s }[1], [x1] 665; CHECK-NEXT: mov v0.d[1], v1.d[0] 666; CHECK-NEXT: ret 667 %la = load <4 x i8>, ptr %a 668 %lb = load <4 x i8>, ptr %b 669 %c = getelementptr <4 x i8>, ptr %a, i64 1 670 %d = getelementptr <4 x i8>, ptr %b, i64 1 671 %lc = load <4 x i8>, ptr %c 672 %ld = load <4 x i8>, ptr %d 673 %s1 = shufflevector <4 x i8> %la, <4 x i8> %lb, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 674 %s2 = shufflevector <4 x i8> %lc, <4 x i8> %ld, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 675 %s3 = shufflevector <8 x i8> %s1, <8 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 676 ret <16 x i8> %s3 677} 678 679define <16 x i8> @load2multi1_v4i8(float %tmp, ptr %a, ptr %b) { 680; CHECK-LABEL: load2multi1_v4i8: 681; CHECK: // %bb.0: 682; CHECK-NEXT: ldr s0, [x0] 683; CHECK-NEXT: ld1 { v0.s }[1], [x1] 684; CHECK-NEXT: mov v0.d[1], v0.d[0] 685; CHECK-NEXT: ret 686 %la = load <4 x i8>, ptr %a 687 %lb = load <4 x i8>, ptr %b 688 %s1 = shufflevector <4 x i8> %la, <4 x i8> %lb, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 689 %s3 = shufflevector <8 x i8> %s1, <8 x i8> %s1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 690 ret <16 x i8> %s3 691} 692 693define <16 x i8> @load2multi2_v4i8(float %tmp, ptr %a, ptr %b) { 694; CHECK-LABEL: load2multi2_v4i8: 695; CHECK: // %bb.0: 696; CHECK-NEXT: ldr s0, [x0] 697; CHECK-NEXT: ldr s1, [x1] 698; CHECK-NEXT: ushll v0.8h, v0.8b, #0 699; CHECK-NEXT: ushll v1.8h, v1.8b, #0 700; CHECK-NEXT: mov v1.d[1], v1.d[0] 701; CHECK-NEXT: mov v0.d[1], v0.d[0] 702; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b 703; CHECK-NEXT: ret 704 %la = load <4 x i8>, ptr %a 705 %lb = load <4 x i8>, ptr %b 706 %s1 = shufflevector <4 x i8> %la, <4 x i8> %la, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 707 %s2 = shufflevector <4 x i8> %lb, <4 x i8> %lb, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 708 %s3 = shufflevector <8 x i8> %s1, <8 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 709 ret <16 x i8> %s3 710} 711 712define void @loads_before_stores(ptr %i44) { 713; CHECK-LABEL: loads_before_stores: 714; CHECK: // %bb.0: // %bb 715; CHECK-NEXT: ldr s0, [x0, #28] 716; CHECK-NEXT: add x8, x0, #20 717; CHECK-NEXT: ldrh w9, [x0, #26] 718; CHECK-NEXT: ldrh w10, [x0, #24] 719; CHECK-NEXT: ld1 { v0.s }[1], [x8] 720; CHECK-NEXT: strh w9, [x0, #20] 721; CHECK-NEXT: strh w10, [x0, #30] 722; CHECK-NEXT: stur d0, [x0, #22] 723; CHECK-NEXT: ret 724bb: 725 %i45 = getelementptr inbounds i8, ptr %i44, i64 20 726 %i46 = getelementptr inbounds i8, ptr %i44, i64 26 727 %i48 = load i8, ptr %i46, align 1 728 %i49 = getelementptr inbounds i8, ptr %i44, i64 21 729 %i50 = getelementptr inbounds i8, ptr %i44, i64 27 730 %i52 = load i8, ptr %i50, align 1 731 %i53 = getelementptr inbounds i8, ptr %i44, i64 22 732 %i54 = getelementptr inbounds i8, ptr %i44, i64 28 733 %i61 = getelementptr inbounds i8, ptr %i44, i64 24 734 %i62 = getelementptr inbounds i8, ptr %i44, i64 30 735 %i63 = load i8, ptr %i61, align 1 736 %i65 = getelementptr inbounds i8, ptr %i44, i64 25 737 %i66 = getelementptr inbounds i8, ptr %i44, i64 31 738 %i67 = load i8, ptr %i65, align 1 739 %0 = load <4 x i8>, ptr %i45, align 1 740 store i8 %i48, ptr %i45, align 1 741 store i8 %i52, ptr %i49, align 1 742 %1 = load <4 x i8>, ptr %i54, align 1 743 store i8 %i63, ptr %i62, align 1 744 %2 = shufflevector <4 x i8> %1, <4 x i8> %0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 745 store <8 x i8> %2, ptr %i53, align 1 746 store i8 %i67, ptr %i66, align 1 747 ret void 748} 749