1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CHECKLE 3; RUN: llc < %s -mtriple=aarch64_be--linux-gnu | FileCheck %s --check-prefix=CHECKBE 4 5define <16 x i32> @test_shuf1(<16 x i32> %x, <16 x i32> %y) { 6; CHECKLE-LABEL: test_shuf1: 7; CHECKLE: // %bb.0: 8; CHECKLE-NEXT: ext v3.16b, v6.16b, v1.16b, #4 9; CHECKLE-NEXT: uzp1 v5.4s, v1.4s, v0.4s 10; CHECKLE-NEXT: uzp2 v16.4s, v2.4s, v4.4s 11; CHECKLE-NEXT: dup v17.4s, v4.s[0] 12; CHECKLE-NEXT: trn2 v4.4s, v1.4s, v3.4s 13; CHECKLE-NEXT: mov v17.s[0], v6.s[3] 14; CHECKLE-NEXT: trn2 v1.4s, v5.4s, v1.4s 15; CHECKLE-NEXT: rev64 v3.4s, v7.4s 16; CHECKLE-NEXT: trn1 v2.4s, v16.4s, v2.4s 17; CHECKLE-NEXT: mov v4.s[0], v7.s[1] 18; CHECKLE-NEXT: ext v1.16b, v0.16b, v1.16b, #12 19; CHECKLE-NEXT: mov v3.d[0], v17.d[0] 20; CHECKLE-NEXT: mov v2.s[3], v7.s[0] 21; CHECKLE-NEXT: mov v0.16b, v4.16b 22; CHECKLE-NEXT: ret 23; 24; CHECKBE-LABEL: test_shuf1: 25; CHECKBE: // %bb.0: 26; CHECKBE-NEXT: rev64 v1.4s, v1.4s 27; CHECKBE-NEXT: rev64 v3.4s, v6.4s 28; CHECKBE-NEXT: rev64 v0.4s, v0.4s 29; CHECKBE-NEXT: rev64 v2.4s, v2.4s 30; CHECKBE-NEXT: rev64 v4.4s, v4.4s 31; CHECKBE-NEXT: rev64 v5.4s, v7.4s 32; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 33; CHECKBE-NEXT: ext v3.16b, v3.16b, v3.16b, #8 34; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 35; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 36; CHECKBE-NEXT: ext v4.16b, v4.16b, v4.16b, #8 37; CHECKBE-NEXT: ext v5.16b, v5.16b, v5.16b, #8 38; CHECKBE-NEXT: ext v6.16b, v3.16b, v1.16b, #4 39; CHECKBE-NEXT: uzp1 v16.4s, v1.4s, v0.4s 40; CHECKBE-NEXT: uzp2 v7.4s, v2.4s, v4.4s 41; CHECKBE-NEXT: dup v4.4s, v4.s[0] 42; CHECKBE-NEXT: rev64 v17.4s, v5.4s 43; CHECKBE-NEXT: trn2 v6.4s, v1.4s, v6.4s 44; CHECKBE-NEXT: mov v4.s[0], v3.s[3] 45; CHECKBE-NEXT: trn2 v1.4s, v16.4s, v1.4s 46; CHECKBE-NEXT: trn1 v2.4s, v7.4s, v2.4s 47; CHECKBE-NEXT: rev64 v3.4s, v17.4s 48; CHECKBE-NEXT: mov v6.s[0], v5.s[1] 49; CHECKBE-NEXT: rev64 v4.4s, v4.4s 50; CHECKBE-NEXT: ext v0.16b, v0.16b, v1.16b, #12 51; CHECKBE-NEXT: mov v2.s[3], v5.s[0] 52; CHECKBE-NEXT: rev64 v1.4s, v6.4s 53; CHECKBE-NEXT: mov v3.d[0], v4.d[0] 54; CHECKBE-NEXT: rev64 v4.4s, v0.4s 55; CHECKBE-NEXT: rev64 v2.4s, v2.4s 56; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8 57; CHECKBE-NEXT: ext v3.16b, v3.16b, v3.16b, #8 58; CHECKBE-NEXT: ext v1.16b, v4.16b, v4.16b, #8 59; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 60; CHECKBE-NEXT: ret 61 %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <16 x i32> <i32 29, i32 26, i32 7, i32 4, i32 3, i32 6, i32 5, i32 2, i32 9, i32 8, i32 17, i32 28, i32 27, i32 16, i32 31, i32 30> 62 ret <16 x i32> %s3 63} 64 65define <4 x i32> @test_shuf2(<16 x i32> %x, <16 x i32> %y) { 66; CHECKLE-LABEL: test_shuf2: 67; CHECKLE: // %bb.0: 68; CHECKLE-NEXT: zip2 v0.4s, v7.4s, v6.4s 69; CHECKLE-NEXT: trn2 v2.4s, v7.4s, v0.4s 70; CHECKLE-NEXT: ext v0.16b, v1.16b, v1.16b, #4 71; CHECKLE-NEXT: mov v0.d[0], v2.d[0] 72; CHECKLE-NEXT: ret 73; 74; CHECKBE-LABEL: test_shuf2: 75; CHECKBE: // %bb.0: 76; CHECKBE-NEXT: rev64 v0.4s, v6.4s 77; CHECKBE-NEXT: rev64 v2.4s, v7.4s 78; CHECKBE-NEXT: rev64 v1.4s, v1.4s 79; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 80; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 81; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 82; CHECKBE-NEXT: zip2 v0.4s, v2.4s, v0.4s 83; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #4 84; CHECKBE-NEXT: trn2 v0.4s, v2.4s, v0.4s 85; CHECKBE-NEXT: rev64 v1.4s, v1.4s 86; CHECKBE-NEXT: rev64 v0.4s, v0.4s 87; CHECKBE-NEXT: mov v1.d[0], v0.d[0] 88; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8 89; CHECKBE-NEXT: ret 90 %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 29, i32 26, i32 7, i32 4> 91 ret <4 x i32> %s3 92} 93 94define <4 x i32> @test_shuf3(<16 x i32> %x, <16 x i32> %y) { 95; CHECKLE-LABEL: test_shuf3: 96; CHECKLE: // %bb.0: 97; CHECKLE-NEXT: uzp1 v2.4s, v1.4s, v0.4s 98; CHECKLE-NEXT: trn2 v1.4s, v2.4s, v1.4s 99; CHECKLE-NEXT: ext v0.16b, v0.16b, v1.16b, #12 100; CHECKLE-NEXT: ret 101; 102; CHECKBE-LABEL: test_shuf3: 103; CHECKBE: // %bb.0: 104; CHECKBE-NEXT: rev64 v0.4s, v0.4s 105; CHECKBE-NEXT: rev64 v1.4s, v1.4s 106; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 107; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 108; CHECKBE-NEXT: uzp1 v2.4s, v1.4s, v0.4s 109; CHECKBE-NEXT: trn2 v1.4s, v2.4s, v1.4s 110; CHECKBE-NEXT: ext v0.16b, v0.16b, v1.16b, #12 111; CHECKBE-NEXT: rev64 v0.4s, v0.4s 112; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 113; CHECKBE-NEXT: ret 114 %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 3, i32 6, i32 5, i32 2> 115 ret <4 x i32> %s3 116} 117 118define <4 x i32> @test_shuf4(<16 x i32> %x, <16 x i32> %y) { 119; CHECKLE-LABEL: test_shuf4: 120; CHECKLE: // %bb.0: 121; CHECKLE-NEXT: uzp2 v0.4s, v2.4s, v4.4s 122; CHECKLE-NEXT: trn1 v0.4s, v0.4s, v2.4s 123; CHECKLE-NEXT: mov v0.s[3], v7.s[0] 124; CHECKLE-NEXT: ret 125; 126; CHECKBE-LABEL: test_shuf4: 127; CHECKBE: // %bb.0: 128; CHECKBE-NEXT: rev64 v0.4s, v4.4s 129; CHECKBE-NEXT: rev64 v1.4s, v2.4s 130; CHECKBE-NEXT: rev64 v2.4s, v7.4s 131; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 132; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 133; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 134; CHECKBE-NEXT: uzp2 v0.4s, v1.4s, v0.4s 135; CHECKBE-NEXT: trn1 v0.4s, v0.4s, v1.4s 136; CHECKBE-NEXT: mov v0.s[3], v2.s[0] 137; CHECKBE-NEXT: rev64 v0.4s, v0.4s 138; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 139; CHECKBE-NEXT: ret 140 %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 9, i32 8, i32 17, i32 28> 141 ret <4 x i32> %s3 142} 143 144define <4 x i32> @test_shuf5(<16 x i32> %x, <16 x i32> %y) { 145; CHECKLE-LABEL: test_shuf5: 146; CHECKLE: // %bb.0: 147; CHECKLE-NEXT: ext v1.16b, v6.16b, v4.16b, #12 148; CHECKLE-NEXT: rev64 v0.4s, v7.4s 149; CHECKLE-NEXT: mov v0.d[0], v1.d[0] 150; CHECKLE-NEXT: ret 151; 152; CHECKBE-LABEL: test_shuf5: 153; CHECKBE: // %bb.0: 154; CHECKBE-NEXT: rev64 v0.4s, v7.4s 155; CHECKBE-NEXT: rev64 v1.4s, v4.4s 156; CHECKBE-NEXT: rev64 v2.4s, v6.4s 157; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 158; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 159; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 160; CHECKBE-NEXT: ext v1.16b, v2.16b, v1.16b, #12 161; CHECKBE-NEXT: rev64 v0.4s, v0.4s 162; CHECKBE-NEXT: rev64 v1.4s, v1.4s 163; CHECKBE-NEXT: rev64 v0.4s, v0.4s 164; CHECKBE-NEXT: mov v0.d[0], v1.d[0] 165; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 166; CHECKBE-NEXT: ret 167 %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> <i32 27, i32 16, i32 31, i32 30> 168 ret <4 x i32> %s3 169} 170 171define <4 x i32> @test1503(<4 x i32> %a, <4 x i32> %b) 172; CHECKLE-LABEL: test1503: 173; CHECKLE: // %bb.0: 174; CHECKLE-NEXT: zip1 v1.4s, v0.4s, v1.4s 175; CHECKLE-NEXT: ext v1.16b, v1.16b, v0.16b, #8 176; CHECKLE-NEXT: mov v1.s[3], v0.s[3] 177; CHECKLE-NEXT: mov v0.16b, v1.16b 178; CHECKLE-NEXT: ret 179; 180; CHECKBE-LABEL: test1503: 181; CHECKBE: // %bb.0: 182; CHECKBE-NEXT: rev64 v1.4s, v1.4s 183; CHECKBE-NEXT: rev64 v0.4s, v0.4s 184; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 185; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 186; CHECKBE-NEXT: zip1 v1.4s, v0.4s, v1.4s 187; CHECKBE-NEXT: ext v1.16b, v1.16b, v0.16b, #8 188; CHECKBE-NEXT: mov v1.s[3], v0.s[3] 189; CHECKBE-NEXT: rev64 v0.4s, v1.4s 190; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 191; CHECKBE-NEXT: ret 192{ 193 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 0, i32 3> 194 ret <4 x i32> %r 195} 196 197define <4 x i32> @test4366(<4 x i32> %a, <4 x i32> %b) 198; CHECKLE-LABEL: test4366: 199; CHECKLE: // %bb.0: 200; CHECKLE-NEXT: trn1 v1.4s, v1.4s, v1.4s 201; CHECKLE-NEXT: mov v1.s[1], v0.s[3] 202; CHECKLE-NEXT: mov v0.16b, v1.16b 203; CHECKLE-NEXT: ret 204; 205; CHECKBE-LABEL: test4366: 206; CHECKBE: // %bb.0: 207; CHECKBE-NEXT: rev64 v1.4s, v1.4s 208; CHECKBE-NEXT: rev64 v0.4s, v0.4s 209; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 210; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 211; CHECKBE-NEXT: trn1 v1.4s, v1.4s, v1.4s 212; CHECKBE-NEXT: mov v1.s[1], v0.s[3] 213; CHECKBE-NEXT: rev64 v0.4s, v1.4s 214; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 215; CHECKBE-NEXT: ret 216{ 217 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 3, i32 6, i32 6> 218 ret <4 x i32> %r 219} 220 221define <4 x i32> @test7367(<4 x i32> %a, <4 x i32> %b) 222; CHECKLE-LABEL: test7367: 223; CHECKLE: // %bb.0: 224; CHECKLE-NEXT: mov v2.16b, v1.16b 225; CHECKLE-NEXT: mov v2.d[0], v0.d[1] 226; CHECKLE-NEXT: mov v2.s[0], v1.s[3] 227; CHECKLE-NEXT: mov v0.16b, v2.16b 228; CHECKLE-NEXT: ret 229; 230; CHECKBE-LABEL: test7367: 231; CHECKBE: // %bb.0: 232; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 233; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 234; CHECKBE-NEXT: rev64 v1.4s, v1.4s 235; CHECKBE-NEXT: mov v2.d[0], v0.d[1] 236; CHECKBE-NEXT: ext v0.16b, v1.16b, v1.16b, #8 237; CHECKBE-NEXT: rev64 v1.4s, v2.4s 238; CHECKBE-NEXT: mov v1.s[0], v0.s[3] 239; CHECKBE-NEXT: rev64 v0.4s, v1.4s 240; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 241; CHECKBE-NEXT: ret 242{ 243 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 3, i32 6, i32 7> 244 ret <4 x i32> %r 245} 246 247define <4 x i32> @test4045(<4 x i32> %a, <4 x i32> %b) 248; CHECKLE-LABEL: test4045: 249; CHECKLE: // %bb.0: 250; CHECKLE-NEXT: trn1 v0.4s, v1.4s, v0.4s 251; CHECKLE-NEXT: mov v0.d[1], v1.d[0] 252; CHECKLE-NEXT: ret 253; 254; CHECKBE-LABEL: test4045: 255; CHECKBE: // %bb.0: 256; CHECKBE-NEXT: rev64 v0.4s, v0.4s 257; CHECKBE-NEXT: rev64 v2.4s, v1.4s 258; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 259; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 260; CHECKBE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 261; CHECKBE-NEXT: trn1 v0.4s, v2.4s, v0.4s 262; CHECKBE-NEXT: rev64 v0.4s, v0.4s 263; CHECKBE-NEXT: mov v0.d[1], v1.d[0] 264; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 265; CHECKBE-NEXT: ret 266{ 267 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 0, i32 4, i32 5> 268 ret <4 x i32> %r 269} 270 271define <4 x i32> @test0067(<4 x i32> %a, <4 x i32> %b) 272; CHECKLE-LABEL: test0067: 273; CHECKLE: // %bb.0: 274; CHECKLE-NEXT: trn1 v0.4s, v0.4s, v0.4s 275; CHECKLE-NEXT: mov v0.d[1], v1.d[1] 276; CHECKLE-NEXT: ret 277; 278; CHECKBE-LABEL: test0067: 279; CHECKBE: // %bb.0: 280; CHECKBE-NEXT: rev64 v0.4s, v0.4s 281; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 282; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 283; CHECKBE-NEXT: trn1 v0.4s, v0.4s, v0.4s 284; CHECKBE-NEXT: rev64 v0.4s, v0.4s 285; CHECKBE-NEXT: mov v0.d[1], v1.d[1] 286; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 287; CHECKBE-NEXT: ret 288{ 289 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 6, i32 7> 290 ret <4 x i32> %r 291} 292 293define <4 x i32> @test_shuf6(<4 x i32> %a, <4 x i32> %b) 294; CHECKLE-LABEL: test_shuf6: 295; CHECKLE: // %bb.0: 296; CHECKLE-NEXT: mov v0.s[2], v1.s[3] 297; CHECKLE-NEXT: trn1 v0.4s, v0.4s, v0.4s 298; CHECKLE-NEXT: ret 299; 300; CHECKBE-LABEL: test_shuf6: 301; CHECKBE: // %bb.0: 302; CHECKBE-NEXT: rev64 v1.4s, v1.4s 303; CHECKBE-NEXT: rev64 v0.4s, v0.4s 304; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 305; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 306; CHECKBE-NEXT: mov v0.s[2], v1.s[3] 307; CHECKBE-NEXT: trn1 v0.4s, v0.4s, v0.4s 308; CHECKBE-NEXT: rev64 v0.4s, v0.4s 309; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 310; CHECKBE-NEXT: ret 311{ 312 %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 0, i32 7, i32 7> 313 ret <4 x i32> %r 314} 315 316define <4 x i16> @test_shuf7(<4 x i16> %a, <4 x i16> %b) 317; CHECKLE-LABEL: test_shuf7: 318; CHECKLE: // %bb.0: 319; CHECKLE-NEXT: // kill: def $d0 killed $d0 def $q0 320; CHECKLE-NEXT: // kill: def $d1 killed $d1 def $q1 321; CHECKLE-NEXT: mov v0.h[2], v1.h[3] 322; CHECKLE-NEXT: trn1 v0.4h, v0.4h, v0.4h 323; CHECKLE-NEXT: ret 324; 325; CHECKBE-LABEL: test_shuf7: 326; CHECKBE: // %bb.0: 327; CHECKBE-NEXT: rev64 v1.4h, v1.4h 328; CHECKBE-NEXT: rev64 v0.4h, v0.4h 329; CHECKBE-NEXT: mov v0.h[2], v1.h[3] 330; CHECKBE-NEXT: trn1 v0.4h, v0.4h, v0.4h 331; CHECKBE-NEXT: rev64 v0.4h, v0.4h 332; CHECKBE-NEXT: ret 333{ 334 %r = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 0, i32 7, i32 7> 335 ret <4 x i16> %r 336} 337 338define <8 x i8> @test_shuf8(<8 x i8> %a, <8 x i8> %b) 339; CHECKLE-LABEL: test_shuf8: 340; CHECKLE: // %bb.0: 341; CHECKLE-NEXT: // kill: def $d0 killed $d0 def $q0 342; CHECKLE-NEXT: // kill: def $d1 killed $d1 def $q1 343; CHECKLE-NEXT: adrp x8, .LCPI12_0 344; CHECKLE-NEXT: mov v0.d[1], v1.d[0] 345; CHECKLE-NEXT: ldr d1, [x8, :lo12:.LCPI12_0] 346; CHECKLE-NEXT: tbl v0.8b, { v0.16b }, v1.8b 347; CHECKLE-NEXT: ret 348; 349; CHECKBE-LABEL: test_shuf8: 350; CHECKBE: // %bb.0: 351; CHECKBE-NEXT: rev64 v0.8b, v0.8b 352; CHECKBE-NEXT: rev64 v1.8b, v1.8b 353; CHECKBE-NEXT: adrp x8, .LCPI12_0 354; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI12_0 355; CHECKBE-NEXT: mov v0.d[1], v1.d[0] 356; CHECKBE-NEXT: ld1 { v1.8b }, [x8] 357; CHECKBE-NEXT: tbl v0.8b, { v0.16b }, v1.8b 358; CHECKBE-NEXT: rev64 v0.8b, v0.8b 359; CHECKBE-NEXT: ret 360{ 361 %r = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8> 362 ret <8 x i8> %r 363} 364 365define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b) 366; CHECKLE-LABEL: test_shuf9: 367; CHECKLE: // %bb.0: 368; CHECKLE-NEXT: adrp x8, .LCPI13_0 369; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 370; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] 371; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 372; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 373; CHECKLE-NEXT: ret 374; 375; CHECKBE-LABEL: test_shuf9: 376; CHECKBE: // %bb.0: 377; CHECKBE-NEXT: rev64 v1.16b, v1.16b 378; CHECKBE-NEXT: rev64 v0.16b, v0.16b 379; CHECKBE-NEXT: adrp x8, .LCPI13_0 380; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI13_0 381; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 382; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 383; CHECKBE-NEXT: ld1 { v0.16b }, [x8] 384; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b 385; CHECKBE-NEXT: rev64 v0.16b, v0.16b 386; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 387; CHECKBE-NEXT: ret 388{ 389 %r = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8> 390 ret <8 x i16> %r 391} 392 393define <16 x i8> @test_shuf10(<16 x i8> %a, <16 x i8> %b) 394; CHECKLE-LABEL: test_shuf10: 395; CHECKLE: // %bb.0: 396; CHECKLE-NEXT: adrp x8, .LCPI14_0 397; CHECKLE-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] 398; CHECKLE-NEXT: tbl v0.16b, { v0.16b }, v1.16b 399; CHECKLE-NEXT: ret 400; 401; CHECKBE-LABEL: test_shuf10: 402; CHECKBE: // %bb.0: 403; CHECKBE-NEXT: rev64 v0.16b, v0.16b 404; CHECKBE-NEXT: adrp x8, .LCPI14_0 405; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI14_0 406; CHECKBE-NEXT: ld1 { v1.16b }, [x8] 407; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 408; CHECKBE-NEXT: tbl v0.16b, { v0.16b }, v1.16b 409; CHECKBE-NEXT: rev64 v0.16b, v0.16b 410; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 411; CHECKBE-NEXT: ret 412{ 413 %r = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8 > 414 ret <16 x i8> %r 415} 416 417define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b) 418; CHECKLE-LABEL: test_shuf11: 419; CHECKLE: // %bb.0: 420; CHECKLE-NEXT: adrp x8, .LCPI15_0 421; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 422; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] 423; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 424; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 425; CHECKLE-NEXT: ret 426; 427; CHECKBE-LABEL: test_shuf11: 428; CHECKBE: // %bb.0: 429; CHECKBE-NEXT: rev64 v1.16b, v1.16b 430; CHECKBE-NEXT: rev64 v0.16b, v0.16b 431; CHECKBE-NEXT: adrp x8, .LCPI15_0 432; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI15_0 433; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 434; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 435; CHECKBE-NEXT: ld1 { v0.16b }, [x8] 436; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b 437; CHECKBE-NEXT: rev64 v0.16b, v0.16b 438; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 439; CHECKBE-NEXT: ret 440{ 441 %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8> 442 ret <8 x half> %r 443} 444 445define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b) 446; CHECKLE-LABEL: test_shuf12: 447; CHECKLE: // %bb.0: 448; CHECKLE-NEXT: adrp x8, .LCPI16_0 449; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 450; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] 451; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 452; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 453; CHECKLE-NEXT: ret 454; 455; CHECKBE-LABEL: test_shuf12: 456; CHECKBE: // %bb.0: 457; CHECKBE-NEXT: rev64 v1.16b, v1.16b 458; CHECKBE-NEXT: rev64 v0.16b, v0.16b 459; CHECKBE-NEXT: adrp x8, .LCPI16_0 460; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI16_0 461; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 462; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 463; CHECKBE-NEXT: ld1 { v0.16b }, [x8] 464; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b 465; CHECKBE-NEXT: rev64 v0.16b, v0.16b 466; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 467; CHECKBE-NEXT: ret 468{ 469 %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 1, i32 15> 470 ret <8 x half> %r 471} 472 473define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b) 474; CHECKLE-LABEL: test_shuf13: 475; CHECKLE: // %bb.0: 476; CHECKLE-NEXT: adrp x8, .LCPI17_0 477; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 478; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] 479; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 480; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 481; CHECKLE-NEXT: ret 482; 483; CHECKBE-LABEL: test_shuf13: 484; CHECKBE: // %bb.0: 485; CHECKBE-NEXT: rev64 v1.16b, v1.16b 486; CHECKBE-NEXT: rev64 v0.16b, v0.16b 487; CHECKBE-NEXT: adrp x8, .LCPI17_0 488; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI17_0 489; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 490; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 491; CHECKBE-NEXT: ld1 { v0.16b }, [x8] 492; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b 493; CHECKBE-NEXT: rev64 v0.16b, v0.16b 494; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 495; CHECKBE-NEXT: ret 496{ 497 %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 2, i32 0, i32 0, i32 0, i32 8, i32 1, i32 15> 498 ret <8 x half> %r 499} 500 501define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b) 502; CHECKLE-LABEL: test_shuf14: 503; CHECKLE: // %bb.0: 504; CHECKLE-NEXT: adrp x8, .LCPI18_0 505; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 506; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] 507; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 508; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 509; CHECKLE-NEXT: ret 510; 511; CHECKBE-LABEL: test_shuf14: 512; CHECKBE: // %bb.0: 513; CHECKBE-NEXT: rev64 v1.16b, v1.16b 514; CHECKBE-NEXT: rev64 v0.16b, v0.16b 515; CHECKBE-NEXT: adrp x8, .LCPI18_0 516; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI18_0 517; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 518; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 519; CHECKBE-NEXT: ld1 { v0.16b }, [x8] 520; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b 521; CHECKBE-NEXT: rev64 v0.16b, v0.16b 522; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 523; CHECKBE-NEXT: ret 524{ 525 %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 2, i32 1, i32 1, i32 0, i32 8, i32 1, i32 15> 526 ret <8 x half> %r 527} 528 529define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b) 530; CHECKLE-LABEL: test_shuf15: 531; CHECKLE: // %bb.0: 532; CHECKLE-NEXT: adrp x8, .LCPI19_0 533; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 534; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI19_0] 535; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 536; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 537; CHECKLE-NEXT: ret 538; 539; CHECKBE-LABEL: test_shuf15: 540; CHECKBE: // %bb.0: 541; CHECKBE-NEXT: rev64 v1.16b, v1.16b 542; CHECKBE-NEXT: rev64 v0.16b, v0.16b 543; CHECKBE-NEXT: adrp x8, .LCPI19_0 544; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI19_0 545; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 546; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 547; CHECKBE-NEXT: ld1 { v0.16b }, [x8] 548; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b 549; CHECKBE-NEXT: rev64 v0.16b, v0.16b 550; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 551; CHECKBE-NEXT: ret 552{ 553 %r = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 2, i32 7, i32 2, i32 0, i32 3, i32 2, i32 15> 554 ret <8 x half> %r 555} 556 557define <4 x i32> @extract_shuffle(<8 x i16> %j, <4 x i16> %k) { 558; CHECKLE-LABEL: extract_shuffle: 559; CHECKLE: // %bb.0: 560; CHECKLE-NEXT: ushll2 v0.4s, v0.8h, #3 561; CHECKLE-NEXT: ret 562; 563; CHECKBE-LABEL: extract_shuffle: 564; CHECKBE: // %bb.0: 565; CHECKBE-NEXT: rev64 v0.8h, v0.8h 566; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 567; CHECKBE-NEXT: ushll2 v0.4s, v0.8h, #3 568; CHECKBE-NEXT: rev64 v0.4s, v0.4s 569; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 570; CHECKBE-NEXT: ret 571 %a = shufflevector <8 x i16> %j, <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison> 572 %b = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 573 %c = zext <4 x i16> %b to <4 x i32> 574 %d = shl <4 x i32> %c, <i32 3, i32 3, i32 3, i32 3> 575 ret <4 x i32> %d 576} 577 578