1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 4 5target triple = "aarch64-unknown-linux-gnu" 6 7; REVB pattern for shuffle v32i8 -> v16i16 8define void @test_revbv16i16(ptr %a) #0 { 9; CHECK-LABEL: test_revbv16i16: 10; CHECK: // %bb.0: 11; CHECK-NEXT: ptrue p0.b, vl32 12; CHECK-NEXT: ptrue p1.h 13; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 14; CHECK-NEXT: revb z0.h, p1/m, z0.h 15; CHECK-NEXT: st1b { z0.b }, p0, [x0] 16; CHECK-NEXT: ret 17 %tmp1 = load <32 x i8>, ptr %a 18 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14, i32 17, i32 16, i32 19, i32 18, i32 21, i32 20, i32 23, i32 22, i32 undef, i32 24, i32 27, i32 undef, i32 29, i32 28, i32 undef, i32 undef> 19 store <32 x i8> %tmp2, ptr %a 20 ret void 21} 22 23; REVB pattern for shuffle v32i8 -> v8i32 24define void @test_revbv8i32(ptr %a) #0 { 25; CHECK-LABEL: test_revbv8i32: 26; CHECK: // %bb.0: 27; CHECK-NEXT: ptrue p0.b, vl32 28; CHECK-NEXT: ptrue p1.s 29; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 30; CHECK-NEXT: revb z0.s, p1/m, z0.s 31; CHECK-NEXT: st1b { z0.b }, p0, [x0] 32; CHECK-NEXT: ret 33 %tmp1 = load <32 x i8>, ptr %a 34 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20, i32 27, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 29, i32 undef> 35 store <32 x i8> %tmp2, ptr %a 36 ret void 37} 38 39; REVB pattern for shuffle v32i8 -> v4i64 40define void @test_revbv4i64(ptr %a) #0 { 41; CHECK-LABEL: test_revbv4i64: 42; CHECK: // %bb.0: 43; CHECK-NEXT: ptrue p0.b, vl32 44; CHECK-NEXT: ptrue p1.d 45; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 46; CHECK-NEXT: revb z0.d, p1/m, z0.d 47; CHECK-NEXT: st1b { z0.b }, p0, [x0] 48; CHECK-NEXT: ret 49 %tmp1 = load <32 x i8>, ptr %a 50 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 31, i32 30, i32 29, i32 undef, i32 27, i32 undef, i32 undef, i32 undef> 51 store <32 x i8> %tmp2, ptr %a 52 ret void 53} 54 55; REVH pattern for shuffle v16i16 -> v8i32 56define void @test_revhv8i32(ptr %a) #0 { 57; CHECK-LABEL: test_revhv8i32: 58; CHECK: // %bb.0: 59; CHECK-NEXT: ptrue p0.h, vl16 60; CHECK-NEXT: ptrue p1.s 61; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 62; CHECK-NEXT: revh z0.s, p1/m, z0.s 63; CHECK-NEXT: st1h { z0.h }, p0, [x0] 64; CHECK-NEXT: ret 65 %tmp1 = load <16 x i16>, ptr %a 66 %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 67 store <16 x i16> %tmp2, ptr %a 68 ret void 69} 70 71; REVH pattern for shuffle v16f16 -> v8f32 72define void @test_revhv8f32(ptr %a) #0 { 73; CHECK-LABEL: test_revhv8f32: 74; CHECK: // %bb.0: 75; CHECK-NEXT: ptrue p0.h, vl16 76; CHECK-NEXT: ptrue p1.s 77; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 78; CHECK-NEXT: revh z0.s, p1/m, z0.s 79; CHECK-NEXT: st1h { z0.h }, p0, [x0] 80; CHECK-NEXT: ret 81 %tmp1 = load <16 x half>, ptr %a 82 %tmp2 = shufflevector <16 x half> %tmp1, <16 x half> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 83 store <16 x half> %tmp2, ptr %a 84 ret void 85} 86 87; REVH pattern for shuffle v16i16 -> v4i64 88define void @test_revhv4i64(ptr %a) #0 { 89; CHECK-LABEL: test_revhv4i64: 90; CHECK: // %bb.0: 91; CHECK-NEXT: ptrue p0.h, vl16 92; CHECK-NEXT: ptrue p1.d 93; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 94; CHECK-NEXT: revh z0.d, p1/m, z0.d 95; CHECK-NEXT: st1h { z0.h }, p0, [x0] 96; CHECK-NEXT: ret 97 %tmp1 = load <16 x i16>, ptr %a 98 %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 99 store <16 x i16> %tmp2, ptr %a 100 ret void 101} 102 103; REVW pattern for shuffle v8i32 -> v4i64 104define void @test_revwv4i64(ptr %a) #0 { 105; CHECK-LABEL: test_revwv4i64: 106; CHECK: // %bb.0: 107; CHECK-NEXT: ptrue p0.s, vl8 108; CHECK-NEXT: ptrue p1.d 109; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 110; CHECK-NEXT: revw z0.d, p1/m, z0.d 111; CHECK-NEXT: st1w { z0.s }, p0, [x0] 112; CHECK-NEXT: ret 113 %tmp1 = load <8 x i32>, ptr %a 114 %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 115 store <8 x i32> %tmp2, ptr %a 116 ret void 117} 118 119; REVW pattern for shuffle v8f32 -> v4f64 120define void @test_revwv4f64(ptr %a) #0 { 121; CHECK-LABEL: test_revwv4f64: 122; CHECK: // %bb.0: 123; CHECK-NEXT: ptrue p0.s, vl8 124; CHECK-NEXT: ptrue p1.d 125; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 126; CHECK-NEXT: revw z0.d, p1/m, z0.d 127; CHECK-NEXT: st1w { z0.s }, p0, [x0] 128; CHECK-NEXT: ret 129 %tmp1 = load <8 x float>, ptr %a 130 %tmp2 = shufflevector <8 x float> %tmp1, <8 x float> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 131 store <8 x float> %tmp2, ptr %a 132 ret void 133} 134 135; Don't use SVE for 128-bit vectors 136define <16 x i8> @test_revv16i8(ptr %a) #0 { 137; CHECK-LABEL: test_revv16i8: 138; CHECK: // %bb.0: 139; CHECK-NEXT: ldr q0, [x0] 140; CHECK-NEXT: rev64 v0.16b, v0.16b 141; CHECK-NEXT: ret 142 %tmp1 = load <16 x i8>, ptr %a 143 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 144 ret <16 x i8> %tmp2 145} 146 147; REVW pattern for shuffle two v8i32 inputs with the second input available. 148define void @test_revwv8i32v8i32(ptr %a, ptr %b) #0 { 149; CHECK-LABEL: test_revwv8i32v8i32: 150; CHECK: // %bb.0: 151; CHECK-NEXT: ptrue p0.s, vl8 152; CHECK-NEXT: ptrue p1.d 153; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1] 154; CHECK-NEXT: revw z0.d, p1/m, z0.d 155; CHECK-NEXT: st1w { z0.s }, p0, [x0] 156; CHECK-NEXT: ret 157 %tmp1 = load <8 x i32>, ptr %a 158 %tmp2 = load <8 x i32>, ptr %b 159 %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> <i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 160 store <8 x i32> %tmp3, ptr %a 161 ret void 162} 163 164; REVH pattern for shuffle v32i16 with 256 bits and 512 bits SVE. 165define void @test_revhv32i16(ptr %a) #0 { 166; VBITS_GE_256-LABEL: test_revhv32i16: 167; VBITS_GE_256: // %bb.0: 168; VBITS_GE_256-NEXT: ptrue p0.h, vl16 169; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 170; VBITS_GE_256-NEXT: ptrue p1.d 171; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] 172; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] 173; VBITS_GE_256-NEXT: revh z0.d, p1/m, z0.d 174; VBITS_GE_256-NEXT: revh z1.d, p1/m, z1.d 175; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 176; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] 177; VBITS_GE_256-NEXT: ret 178; 179; VBITS_GE_512-LABEL: test_revhv32i16: 180; VBITS_GE_512: // %bb.0: 181; VBITS_GE_512-NEXT: ptrue p0.h, vl32 182; VBITS_GE_512-NEXT: ptrue p1.d 183; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] 184; VBITS_GE_512-NEXT: revh z0.d, p1/m, z0.d 185; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 186; VBITS_GE_512-NEXT: ret 187 %tmp1 = load <32 x i16>, ptr %a 188 %tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <32 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12, i32 19, i32 18, i32 17, i32 16, i32 23, i32 22, i32 21, i32 20, i32 27, i32 undef, i32 undef, i32 undef, i32 31, i32 30, i32 29, i32 undef> 189 store <32 x i16> %tmp2, ptr %a 190 ret void 191} 192 193; Only support to reverse bytes / halfwords / words within elements 194define void @test_rev_elts_fail(ptr %a) #1 { 195; CHECK-LABEL: test_rev_elts_fail: 196; CHECK: // %bb.0: 197; CHECK-NEXT: ptrue p0.d 198; CHECK-NEXT: adrp x8, .LCPI11_0 199; CHECK-NEXT: add x8, x8, :lo12:.LCPI11_0 200; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 201; CHECK-NEXT: ld1d { z1.d }, p0/z, [x8] 202; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d 203; CHECK-NEXT: st1d { z0.d }, p0, [x0] 204; CHECK-NEXT: ret 205 %tmp1 = load <4 x i64>, ptr %a 206 %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 207 store <4 x i64> %tmp2, ptr %a 208 ret void 209} 210 211; This is the same test as above, but with sve2p1 it can use the REVD instruction to reverse 212; the double-words within quard-words. 213define void @test_revdv4i64_sve2p1(ptr %a) #2 { 214; CHECK-LABEL: test_revdv4i64_sve2p1: 215; CHECK: // %bb.0: 216; CHECK-NEXT: ptrue p0.d, vl4 217; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 218; CHECK-NEXT: revd z0.q, p0/m, z0.q 219; CHECK-NEXT: st1d { z0.d }, p0, [x0] 220; CHECK-NEXT: ret 221 %tmp1 = load <4 x i64>, ptr %a 222 %tmp2 = shufflevector <4 x i64> %tmp1, <4 x i64> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 223 store <4 x i64> %tmp2, ptr %a 224 ret void 225} 226 227define void @test_revdv4f64_sve2p1(ptr %a) #2 { 228; CHECK-LABEL: test_revdv4f64_sve2p1: 229; CHECK: // %bb.0: 230; CHECK-NEXT: ptrue p0.d, vl4 231; CHECK-NEXT: ptrue p1.d 232; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 233; CHECK-NEXT: revd z0.q, p1/m, z0.q 234; CHECK-NEXT: st1d { z0.d }, p0, [x0] 235; CHECK-NEXT: ret 236 %tmp1 = load <4 x double>, ptr %a 237 %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2> 238 store <4 x double> %tmp2, ptr %a 239 ret void 240} 241 242; REV instruction will reverse the order of all elements in the vector. 243; When the vector length and the target register size are inconsistent, 244; the correctness of generated REV instruction for shuffle pattern cannot be guaranteed. 245 246; sve-vector-bits-min=256, sve-vector-bits-max is not set, REV inst can't be generated. 247define void @test_revv8i32(ptr %a) #0 { 248; VBITS_GE_256-LABEL: test_revv8i32: 249; VBITS_GE_256: // %bb.0: 250; VBITS_GE_256-NEXT: ptrue p0.s, vl8 251; VBITS_GE_256-NEXT: index z0.s, #7, #-1 252; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] 253; VBITS_GE_256-NEXT: tbl z0.s, { z1.s }, z0.s 254; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] 255; VBITS_GE_256-NEXT: ret 256; 257; VBITS_GE_512-LABEL: test_revv8i32: 258; VBITS_GE_512: // %bb.0: 259; VBITS_GE_512-NEXT: ptrue p0.s, vl8 260; VBITS_GE_512-NEXT: adrp x8, .LCPI14_0 261; VBITS_GE_512-NEXT: add x8, x8, :lo12:.LCPI14_0 262; VBITS_GE_512-NEXT: ptrue p1.s, vl16 263; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] 264; VBITS_GE_512-NEXT: ld1w { z1.s }, p1/z, [x8] 265; VBITS_GE_512-NEXT: tbl z0.s, { z0.s }, z1.s 266; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 267; VBITS_GE_512-NEXT: ret 268 %tmp1 = load <8 x i32>, ptr %a 269 %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 270 store <8 x i32> %tmp2, ptr %a 271 ret void 272} 273 274; REV pattern for v32i8 shuffle with vscale_range(2,2) 275define void @test_revv32i8_vl256(ptr %a) #1 { 276; CHECK-LABEL: test_revv32i8_vl256: 277; CHECK: // %bb.0: 278; CHECK-NEXT: ptrue p0.b 279; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 280; CHECK-NEXT: rev z0.b, z0.b 281; CHECK-NEXT: st1b { z0.b }, p0, [x0] 282; CHECK-NEXT: ret 283 %tmp1 = load <32 x i8>, ptr %a 284 %tmp2 = shufflevector <32 x i8> %tmp1, <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 285 store <32 x i8> %tmp2, ptr %a 286 ret void 287} 288 289; REV pattern for v16i16 shuffle with vscale_range(2,2) 290define void @test_revv16i16_vl256(ptr %a) #1 { 291; CHECK-LABEL: test_revv16i16_vl256: 292; CHECK: // %bb.0: 293; CHECK-NEXT: ptrue p0.h 294; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 295; CHECK-NEXT: rev z0.h, z0.h 296; CHECK-NEXT: st1h { z0.h }, p0, [x0] 297; CHECK-NEXT: ret 298 %tmp1 = load <16 x i16>, ptr %a 299 %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 300 store <16 x i16> %tmp2, ptr %a 301 ret void 302} 303 304; REV pattern for v8f32 shuffle with vscale_range(2,2) 305define void @test_revv8f32_vl256(ptr %a) #1 { 306; CHECK-LABEL: test_revv8f32_vl256: 307; CHECK: // %bb.0: 308; CHECK-NEXT: ptrue p0.s 309; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 310; CHECK-NEXT: rev z0.s, z0.s 311; CHECK-NEXT: st1w { z0.s }, p0, [x0] 312; CHECK-NEXT: ret 313 %tmp1 = load <8 x float>, ptr %a 314 %tmp2 = shufflevector <8 x float> %tmp1, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 315 store <8 x float> %tmp2, ptr %a 316 ret void 317} 318 319; REV pattern for v4f64 shuffle with vscale_range(2,2) 320define void @test_revv4f64_vl256(ptr %a) #1 { 321; CHECK-LABEL: test_revv4f64_vl256: 322; CHECK: // %bb.0: 323; CHECK-NEXT: ptrue p0.d 324; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 325; CHECK-NEXT: rev z0.d, z0.d 326; CHECK-NEXT: st1d { z0.d }, p0, [x0] 327; CHECK-NEXT: ret 328 %tmp1 = load <4 x double>, ptr %a 329 %tmp2 = shufflevector <4 x double> %tmp1, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 330 store <4 x double> %tmp2, ptr %a 331 ret void 332} 333 334; REV pattern for shuffle two v8i32 inputs with the second input available, vscale_range(2,2). 335define void @test_revv8i32v8i32(ptr %a, ptr %b) #1 { 336; CHECK-LABEL: test_revv8i32v8i32: 337; CHECK: // %bb.0: 338; CHECK-NEXT: ptrue p0.s 339; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1] 340; CHECK-NEXT: rev z0.s, z0.s 341; CHECK-NEXT: st1w { z0.s }, p0, [x0] 342; CHECK-NEXT: ret 343 %tmp1 = load <8 x i32>, ptr %a 344 %tmp2 = load <8 x i32>, ptr %b 345 %tmp3 = shufflevector <8 x i32> %tmp1, <8 x i32> %tmp2, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 346 store <8 x i32> %tmp3, ptr %a 347 ret void 348} 349 350; Illegal REV pattern. 351define void @test_rev_fail(ptr %a) #1 { 352; CHECK-LABEL: test_rev_fail: 353; CHECK: // %bb.0: 354; CHECK-NEXT: ptrue p0.h 355; CHECK-NEXT: adrp x8, .LCPI20_0 356; CHECK-NEXT: add x8, x8, :lo12:.LCPI20_0 357; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 358; CHECK-NEXT: ld1h { z1.h }, p0/z, [x8] 359; CHECK-NEXT: tbl z0.h, { z0.h }, z1.h 360; CHECK-NEXT: st1h { z0.h }, p0, [x0] 361; CHECK-NEXT: ret 362 %tmp1 = load <16 x i16>, ptr %a 363 %tmp2 = shufflevector <16 x i16> %tmp1, <16 x i16> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8> 364 store <16 x i16> %tmp2, ptr %a 365 ret void 366} 367 368; Don't use SVE for 128-bit shuffle with two inputs 369define void @test_revv8i16v8i16(ptr %a, ptr %b, ptr %c) #1 { 370; CHECK-LABEL: test_revv8i16v8i16: 371; CHECK: // %bb.0: 372; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 373; CHECK-NEXT: sub x9, sp, #48 374; CHECK-NEXT: mov x29, sp 375; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 376; CHECK-NEXT: .cfi_def_cfa w29, 16 377; CHECK-NEXT: .cfi_offset w30, -8 378; CHECK-NEXT: .cfi_offset w29, -16 379; CHECK-NEXT: mov x8, sp 380; CHECK-NEXT: ldr q0, [x1] 381; CHECK-NEXT: ldr q1, [x0] 382; CHECK-NEXT: orr x9, x8, #0x1e 383; CHECK-NEXT: orr x10, x8, #0x1c 384; CHECK-NEXT: ptrue p0.h 385; CHECK-NEXT: st1 { v0.h }[4], [x9] 386; CHECK-NEXT: orr x9, x8, #0x18 387; CHECK-NEXT: st1 { v0.h }[7], [x9] 388; CHECK-NEXT: orr x9, x8, #0xe 389; CHECK-NEXT: st1 { v1.h }[4], [x9] 390; CHECK-NEXT: orr x9, x8, #0xc 391; CHECK-NEXT: st1 { v1.h }[5], [x9] 392; CHECK-NEXT: orr x9, x8, #0x8 393; CHECK-NEXT: st1 { v0.h }[5], [x10] 394; CHECK-NEXT: orr x10, x8, #0x10 395; CHECK-NEXT: st1 { v1.h }[7], [x9] 396; CHECK-NEXT: orr x9, x8, #0x4 397; CHECK-NEXT: st1 { v0.h }[3], [x10] 398; CHECK-NEXT: mov w10, #26 // =0x1a 399; CHECK-NEXT: st1 { v1.h }[1], [x9] 400; CHECK-NEXT: orr x9, x8, #0x2 401; CHECK-NEXT: st1 { v1.h }[2], [x9] 402; CHECK-NEXT: orr x9, x8, x10 403; CHECK-NEXT: mov w10, #20 // =0x14 404; CHECK-NEXT: st1 { v0.h }[6], [x9] 405; CHECK-NEXT: orr x9, x8, x10 406; CHECK-NEXT: mov w10, #18 // =0x12 407; CHECK-NEXT: st1 { v0.h }[1], [x9] 408; CHECK-NEXT: orr x9, x8, x10 409; CHECK-NEXT: st1 { v0.h }[2], [x9] 410; CHECK-NEXT: mov w9, #10 // =0xa 411; CHECK-NEXT: orr x9, x8, x9 412; CHECK-NEXT: st1 { v1.h }[3], [x8] 413; CHECK-NEXT: st1 { v1.h }[6], [x9] 414; CHECK-NEXT: str h0, [sp, #22] 415; CHECK-NEXT: str h1, [sp, #6] 416; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] 417; CHECK-NEXT: st1h { z0.h }, p0, [x2] 418; CHECK-NEXT: mov sp, x29 419; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 420; CHECK-NEXT: ret 421 %tmp1 = load <8 x i16>, ptr %a 422 %tmp2 = load <8 x i16>, ptr %b 423 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 424 store <16 x i16> %tmp3, ptr %c 425 ret void 426} 427 428attributes #0 = { "target-features"="+sve" } 429attributes #1 = { "target-features"="+sve" vscale_range(2,2) } 430attributes #2 = { "target-features"="+sve2p1" } 431