1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mattr=+sve2 -force-streaming-compatible -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 < %s | FileCheck %s -check-prefixes=CHECK,SVE2_128 3; RUN: llc -mattr=+sve2 -force-streaming-compatible -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -check-prefixes=CHECK,SVE2_128_NOMAX 4; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s -check-prefixes=CHECK,SVE2_NOMIN_NOMAX 5; RUN: llc -mattr=+sve2 -force-streaming-compatible -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,SVE2_MIN_256_NOMAX 6 7target triple = "aarch64-unknown-linux-gnu" 8 9; SVE2_128: .LCPI0_0: 10; SVE2_128-NEXT: .byte 0 // 0x0 11; SVE2_128-NEXT: .byte 7 // 0x7 12; SVE2_128-NEXT: .byte 2 // 0x2 13; SVE2_128-NEXT: .byte 3 // 0x3 14; SVE2_128-NEXT: .byte 4 // 0x4 15; SVE2_128-NEXT: .byte 5 // 0x5 16; SVE2_128-NEXT: .byte 6 // 0x6 17; SVE2_128-NEXT: .byte 7 // 0x7 18; SVE2_128-NEXT: .byte 255 // 0xff 19; SVE2_128-NEXT: .byte 255 // 0xff 20define <8 x i8> @shuffle_index_indices_from_op1(ptr %a, ptr %b) { 21; SVE2_128-LABEL: shuffle_index_indices_from_op1: 22; SVE2_128: // %bb.0: 23; SVE2_128-NEXT: adrp x8, .LCPI0_0 24; SVE2_128-NEXT: ldr d0, [x0] 25; SVE2_128-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] 26; SVE2_128-NEXT: tbl z0.b, { z0.b }, z1.b 27; SVE2_128-NEXT: // kill: def $d0 killed $d0 killed $z0 28; SVE2_128-NEXT: ret 29; 30; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_op1: 31; SVE2_128_NOMAX: // %bb.0: 32; SVE2_128_NOMAX-NEXT: adrp x8, .LCPI0_0 33; SVE2_128_NOMAX-NEXT: ldr d0, [x0] 34; SVE2_128_NOMAX-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] 35; SVE2_128_NOMAX-NEXT: tbl z0.b, { z0.b }, z1.b 36; SVE2_128_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 37; SVE2_128_NOMAX-NEXT: ret 38; 39; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_op1: 40; SVE2_NOMIN_NOMAX: // %bb.0: 41; SVE2_NOMIN_NOMAX-NEXT: adrp x8, .LCPI0_0 42; SVE2_NOMIN_NOMAX-NEXT: ldr d0, [x0] 43; SVE2_NOMIN_NOMAX-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] 44; SVE2_NOMIN_NOMAX-NEXT: tbl z0.b, { z0.b }, z1.b 45; SVE2_NOMIN_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 46; SVE2_NOMIN_NOMAX-NEXT: ret 47; 48; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_op1: 49; SVE2_MIN_256_NOMAX: // %bb.0: 50; SVE2_MIN_256_NOMAX-NEXT: ptrue p0.b, vl32 51; SVE2_MIN_256_NOMAX-NEXT: adrp x8, .LCPI0_0 52; SVE2_MIN_256_NOMAX-NEXT: add x8, x8, :lo12:.LCPI0_0 53; SVE2_MIN_256_NOMAX-NEXT: ldr d1, [x0] 54; SVE2_MIN_256_NOMAX-NEXT: ld1b { z0.b }, p0/z, [x8] 55; SVE2_MIN_256_NOMAX-NEXT: tbl z0.b, { z1.b }, z0.b 56; SVE2_MIN_256_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 57; SVE2_MIN_256_NOMAX-NEXT: ret 58 %op1 = load <8 x i8>, ptr %a 59 %op2 = load <8 x i8>, ptr %b 60 %1 = shufflevector <8 x i8> %op1, <8 x i8> %op2, <8 x i32> <i32 0, i32 7, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 61 ret <8 x i8> %1 62} 63 64; SVE2_128: .LCPI1_0: 65; SVE2_128-NEXT: .byte 0 // 0x0 66; SVE2_128-NEXT: .byte 1 // 0x1 67; SVE2_128-NEXT: .byte 1 // 0x1 68; SVE2_128-NEXT: .byte 3 // 0x3 69; SVE2_128-NEXT: .byte 4 // 0x4 70; SVE2_128-NEXT: .byte 7 // 0x7 71; SVE2_128-NEXT: .byte 6 // 0x6 72; SVE2_128-NEXT: .byte 7 // 0x7 73; SVE2_128-NEXT: .byte 255 // 0xff 74; SVE2_128-NEXT: .byte 255 // 0xff 75define <8 x i8> @shuffle_index_indices_from_op2(ptr %a, ptr %b) { 76; SVE2_128-LABEL: shuffle_index_indices_from_op2: 77; SVE2_128: // %bb.0: 78; SVE2_128-NEXT: adrp x8, .LCPI1_0 79; SVE2_128-NEXT: ldr d0, [x1] 80; SVE2_128-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] 81; SVE2_128-NEXT: tbl z0.b, { z0.b }, z1.b 82; SVE2_128-NEXT: // kill: def $d0 killed $d0 killed $z0 83; SVE2_128-NEXT: ret 84; 85; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_op2: 86; SVE2_128_NOMAX: // %bb.0: 87; SVE2_128_NOMAX-NEXT: adrp x8, .LCPI1_0 88; SVE2_128_NOMAX-NEXT: ldr d0, [x1] 89; SVE2_128_NOMAX-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] 90; SVE2_128_NOMAX-NEXT: tbl z0.b, { z0.b }, z1.b 91; SVE2_128_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 92; SVE2_128_NOMAX-NEXT: ret 93; 94; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_op2: 95; SVE2_NOMIN_NOMAX: // %bb.0: 96; SVE2_NOMIN_NOMAX-NEXT: adrp x8, .LCPI1_0 97; SVE2_NOMIN_NOMAX-NEXT: ldr d0, [x1] 98; SVE2_NOMIN_NOMAX-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] 99; SVE2_NOMIN_NOMAX-NEXT: tbl z0.b, { z0.b }, z1.b 100; SVE2_NOMIN_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 101; SVE2_NOMIN_NOMAX-NEXT: ret 102; 103; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_op2: 104; SVE2_MIN_256_NOMAX: // %bb.0: 105; SVE2_MIN_256_NOMAX-NEXT: ptrue p0.b, vl32 106; SVE2_MIN_256_NOMAX-NEXT: adrp x8, .LCPI1_0 107; SVE2_MIN_256_NOMAX-NEXT: add x8, x8, :lo12:.LCPI1_0 108; SVE2_MIN_256_NOMAX-NEXT: ldr d1, [x1] 109; SVE2_MIN_256_NOMAX-NEXT: ld1b { z0.b }, p0/z, [x8] 110; SVE2_MIN_256_NOMAX-NEXT: tbl z0.b, { z1.b }, z0.b 111; SVE2_MIN_256_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 112; SVE2_MIN_256_NOMAX-NEXT: ret 113 %op1 = load <8 x i8>, ptr %a 114 %op2 = load <8 x i8>, ptr %b 115 %1 = shufflevector <8 x i8> %op1, <8 x i8> %op2, <8 x i32> <i32 8, i32 9, i32 9, i32 11, i32 12, i32 15, i32 14, i32 15> 116 ret <8 x i8> %1 117} 118 119; SVE2_128: .LCPI2_0: 120; SVE2_128-NEXT: .byte 1 // 0x1 121; SVE2_128-NEXT: .byte 17 // 0x11 122; SVE2_128-NEXT: .byte 18 // 0x12 123; SVE2_128-NEXT: .byte 19 // 0x13 124; SVE2_128-NEXT: .byte 20 // 0x14 125; SVE2_128-NEXT: .byte 20 // 0x14 126; SVE2_128-NEXT: .byte 22 // 0x16 127; SVE2_128-NEXT: .byte 23 // 0x17 128; SVE2_128-NEXT: .byte 255 // 0xff 129; SVE2_128-NEXT: .byte 255 // 0xff 130define <8 x i8> @shuffle_index_indices_from_both_ops(ptr %a, ptr %b) { 131; SVE2_128-LABEL: shuffle_index_indices_from_both_ops: 132; SVE2_128: // %bb.0: 133; SVE2_128-NEXT: adrp x8, .LCPI2_0 134; SVE2_128-NEXT: ldr d0, [x0] 135; SVE2_128-NEXT: ldr d1, [x1] 136; SVE2_128-NEXT: ldr q2, [x8, :lo12:.LCPI2_0] 137; SVE2_128-NEXT: tbl z0.b, { z0.b, z1.b }, z2.b 138; SVE2_128-NEXT: // kill: def $d0 killed $d0 killed $z0 139; SVE2_128-NEXT: ret 140; 141; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_both_ops: 142; SVE2_128_NOMAX: // %bb.0: 143; SVE2_128_NOMAX-NEXT: ldr d0, [x1] 144; SVE2_128_NOMAX-NEXT: ldr d1, [x0] 145; SVE2_128_NOMAX-NEXT: mov z2.b, z0.b[3] 146; SVE2_128_NOMAX-NEXT: mov z3.b, z0.b[2] 147; SVE2_128_NOMAX-NEXT: mov z4.b, z0.b[1] 148; SVE2_128_NOMAX-NEXT: mov z1.b, z1.b[1] 149; SVE2_128_NOMAX-NEXT: mov z5.b, z0.b[7] 150; SVE2_128_NOMAX-NEXT: mov z6.b, z0.b[6] 151; SVE2_128_NOMAX-NEXT: mov z0.b, z0.b[4] 152; SVE2_128_NOMAX-NEXT: zip1 z2.b, z3.b, z2.b 153; SVE2_128_NOMAX-NEXT: zip1 z1.b, z1.b, z4.b 154; SVE2_128_NOMAX-NEXT: zip1 z3.b, z6.b, z5.b 155; SVE2_128_NOMAX-NEXT: zip1 z0.b, z0.b, z0.b 156; SVE2_128_NOMAX-NEXT: zip1 z1.h, z1.h, z2.h 157; SVE2_128_NOMAX-NEXT: zip1 z0.h, z0.h, z3.h 158; SVE2_128_NOMAX-NEXT: zip1 z0.s, z1.s, z0.s 159; SVE2_128_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 160; SVE2_128_NOMAX-NEXT: ret 161; 162; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_both_ops: 163; SVE2_NOMIN_NOMAX: // %bb.0: 164; SVE2_NOMIN_NOMAX-NEXT: ldr d0, [x1] 165; SVE2_NOMIN_NOMAX-NEXT: ldr d1, [x0] 166; SVE2_NOMIN_NOMAX-NEXT: mov z2.b, z0.b[3] 167; SVE2_NOMIN_NOMAX-NEXT: mov z3.b, z0.b[2] 168; SVE2_NOMIN_NOMAX-NEXT: mov z4.b, z0.b[1] 169; SVE2_NOMIN_NOMAX-NEXT: mov z1.b, z1.b[1] 170; SVE2_NOMIN_NOMAX-NEXT: mov z5.b, z0.b[7] 171; SVE2_NOMIN_NOMAX-NEXT: mov z6.b, z0.b[6] 172; SVE2_NOMIN_NOMAX-NEXT: mov z0.b, z0.b[4] 173; SVE2_NOMIN_NOMAX-NEXT: zip1 z2.b, z3.b, z2.b 174; SVE2_NOMIN_NOMAX-NEXT: zip1 z1.b, z1.b, z4.b 175; SVE2_NOMIN_NOMAX-NEXT: zip1 z3.b, z6.b, z5.b 176; SVE2_NOMIN_NOMAX-NEXT: zip1 z0.b, z0.b, z0.b 177; SVE2_NOMIN_NOMAX-NEXT: zip1 z1.h, z1.h, z2.h 178; SVE2_NOMIN_NOMAX-NEXT: zip1 z0.h, z0.h, z3.h 179; SVE2_NOMIN_NOMAX-NEXT: zip1 z0.s, z1.s, z0.s 180; SVE2_NOMIN_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 181; SVE2_NOMIN_NOMAX-NEXT: ret 182; 183; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_both_ops: 184; SVE2_MIN_256_NOMAX: // %bb.0: 185; SVE2_MIN_256_NOMAX-NEXT: ldr d0, [x1] 186; SVE2_MIN_256_NOMAX-NEXT: ldr d1, [x0] 187; SVE2_MIN_256_NOMAX-NEXT: mov z2.b, z0.b[3] 188; SVE2_MIN_256_NOMAX-NEXT: mov z3.b, z0.b[2] 189; SVE2_MIN_256_NOMAX-NEXT: mov z4.b, z0.b[1] 190; SVE2_MIN_256_NOMAX-NEXT: mov z1.b, z1.b[1] 191; SVE2_MIN_256_NOMAX-NEXT: mov z5.b, z0.b[7] 192; SVE2_MIN_256_NOMAX-NEXT: mov z6.b, z0.b[6] 193; SVE2_MIN_256_NOMAX-NEXT: mov z0.b, z0.b[4] 194; SVE2_MIN_256_NOMAX-NEXT: zip1 z2.b, z3.b, z2.b 195; SVE2_MIN_256_NOMAX-NEXT: zip1 z1.b, z1.b, z4.b 196; SVE2_MIN_256_NOMAX-NEXT: zip1 z3.b, z6.b, z5.b 197; SVE2_MIN_256_NOMAX-NEXT: zip1 z0.b, z0.b, z0.b 198; SVE2_MIN_256_NOMAX-NEXT: zip1 z1.h, z1.h, z2.h 199; SVE2_MIN_256_NOMAX-NEXT: zip1 z0.h, z0.h, z3.h 200; SVE2_MIN_256_NOMAX-NEXT: zip1 z0.s, z1.s, z0.s 201; SVE2_MIN_256_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 202; SVE2_MIN_256_NOMAX-NEXT: ret 203 %op1 = load <8 x i8>, ptr %a 204 %op2 = load <8 x i8>, ptr %b 205 %1 = shufflevector <8 x i8> %op1, <8 x i8> %op2, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 15> 206 ret <8 x i8> %1 207} 208 209; SVE2_128: .LCPI3_0: 210; SVE2_128-NEXT: .byte 1 // 0x1 211; SVE2_128-NEXT: .byte 17 // 0x11 212; SVE2_128-NEXT: .byte 18 // 0x12 213; SVE2_128-NEXT: .byte 19 // 0x13 214; SVE2_128-NEXT: .byte 20 // 0x14 215; SVE2_128-NEXT: .byte 20 // 0x14 216; SVE2_128-NEXT: .byte 22 // 0x16 217; SVE2_128-NEXT: .byte 0 // 0x0 218; SVE2_128-NEXT: .byte 255 // 0xff 219; SVE2_128-NEXT: .byte 255 // 0xff 220define <8 x i8> @shuffle_index_poison_value(ptr %a, ptr %b) { 221; SVE2_128-LABEL: shuffle_index_poison_value: 222; SVE2_128: // %bb.0: 223; SVE2_128-NEXT: adrp x8, .LCPI3_0 224; SVE2_128-NEXT: ldr d0, [x0] 225; SVE2_128-NEXT: ldr d1, [x1] 226; SVE2_128-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] 227; SVE2_128-NEXT: tbl z0.b, { z0.b, z1.b }, z2.b 228; SVE2_128-NEXT: // kill: def $d0 killed $d0 killed $z0 229; SVE2_128-NEXT: ret 230; 231; SVE2_128_NOMAX-LABEL: shuffle_index_poison_value: 232; SVE2_128_NOMAX: // %bb.0: 233; SVE2_128_NOMAX-NEXT: ldr d0, [x1] 234; SVE2_128_NOMAX-NEXT: ldr d1, [x0] 235; SVE2_128_NOMAX-NEXT: mov z2.b, z0.b[3] 236; SVE2_128_NOMAX-NEXT: mov z3.b, z0.b[2] 237; SVE2_128_NOMAX-NEXT: mov z4.b, z0.b[1] 238; SVE2_128_NOMAX-NEXT: mov z1.b, z1.b[1] 239; SVE2_128_NOMAX-NEXT: mov z5.b, z0.b[4] 240; SVE2_128_NOMAX-NEXT: mov z0.b, z0.b[6] 241; SVE2_128_NOMAX-NEXT: zip1 z2.b, z3.b, z2.b 242; SVE2_128_NOMAX-NEXT: zip1 z1.b, z1.b, z4.b 243; SVE2_128_NOMAX-NEXT: zip1 z3.b, z5.b, z5.b 244; SVE2_128_NOMAX-NEXT: zip1 z1.h, z1.h, z2.h 245; SVE2_128_NOMAX-NEXT: zip1 z0.h, z3.h, z0.h 246; SVE2_128_NOMAX-NEXT: zip1 z0.s, z1.s, z0.s 247; SVE2_128_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 248; SVE2_128_NOMAX-NEXT: ret 249; 250; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_poison_value: 251; SVE2_NOMIN_NOMAX: // %bb.0: 252; SVE2_NOMIN_NOMAX-NEXT: ldr d0, [x1] 253; SVE2_NOMIN_NOMAX-NEXT: ldr d1, [x0] 254; SVE2_NOMIN_NOMAX-NEXT: mov z2.b, z0.b[3] 255; SVE2_NOMIN_NOMAX-NEXT: mov z3.b, z0.b[2] 256; SVE2_NOMIN_NOMAX-NEXT: mov z4.b, z0.b[1] 257; SVE2_NOMIN_NOMAX-NEXT: mov z1.b, z1.b[1] 258; SVE2_NOMIN_NOMAX-NEXT: mov z5.b, z0.b[4] 259; SVE2_NOMIN_NOMAX-NEXT: mov z0.b, z0.b[6] 260; SVE2_NOMIN_NOMAX-NEXT: zip1 z2.b, z3.b, z2.b 261; SVE2_NOMIN_NOMAX-NEXT: zip1 z1.b, z1.b, z4.b 262; SVE2_NOMIN_NOMAX-NEXT: zip1 z3.b, z5.b, z5.b 263; SVE2_NOMIN_NOMAX-NEXT: zip1 z1.h, z1.h, z2.h 264; SVE2_NOMIN_NOMAX-NEXT: zip1 z0.h, z3.h, z0.h 265; SVE2_NOMIN_NOMAX-NEXT: zip1 z0.s, z1.s, z0.s 266; SVE2_NOMIN_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 267; SVE2_NOMIN_NOMAX-NEXT: ret 268; 269; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_poison_value: 270; SVE2_MIN_256_NOMAX: // %bb.0: 271; SVE2_MIN_256_NOMAX-NEXT: ldr d0, [x1] 272; SVE2_MIN_256_NOMAX-NEXT: ldr d1, [x0] 273; SVE2_MIN_256_NOMAX-NEXT: mov z2.b, z0.b[3] 274; SVE2_MIN_256_NOMAX-NEXT: mov z3.b, z0.b[2] 275; SVE2_MIN_256_NOMAX-NEXT: mov z4.b, z0.b[1] 276; SVE2_MIN_256_NOMAX-NEXT: mov z1.b, z1.b[1] 277; SVE2_MIN_256_NOMAX-NEXT: mov z5.b, z0.b[4] 278; SVE2_MIN_256_NOMAX-NEXT: mov z0.b, z0.b[6] 279; SVE2_MIN_256_NOMAX-NEXT: zip1 z2.b, z3.b, z2.b 280; SVE2_MIN_256_NOMAX-NEXT: zip1 z1.b, z1.b, z4.b 281; SVE2_MIN_256_NOMAX-NEXT: zip1 z3.b, z5.b, z5.b 282; SVE2_MIN_256_NOMAX-NEXT: zip1 z1.h, z1.h, z2.h 283; SVE2_MIN_256_NOMAX-NEXT: zip1 z0.h, z3.h, z0.h 284; SVE2_MIN_256_NOMAX-NEXT: zip1 z0.s, z1.s, z0.s 285; SVE2_MIN_256_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 286; SVE2_MIN_256_NOMAX-NEXT: ret 287 %op1 = load <8 x i8>, ptr %a 288 %op2 = load <8 x i8>, ptr %b 289 %1 = shufflevector <8 x i8> %op1, <8 x i8> %op2, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 poison> 290 ret <8 x i8> %1 291} 292 293define <8 x i8> @shuffle_op1_poison(ptr %a, ptr %b) { 294; SVE2_128-LABEL: shuffle_op1_poison: 295; SVE2_128: // %bb.0: 296; SVE2_128-NEXT: adrp x8, .LCPI4_0 297; SVE2_128-NEXT: ldr d0, [x1] 298; SVE2_128-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] 299; SVE2_128-NEXT: tbl z0.b, { z0.b }, z1.b 300; SVE2_128-NEXT: // kill: def $d0 killed $d0 killed $z0 301; SVE2_128-NEXT: ret 302; 303; SVE2_128_NOMAX-LABEL: shuffle_op1_poison: 304; SVE2_128_NOMAX: // %bb.0: 305; SVE2_128_NOMAX-NEXT: adrp x8, .LCPI4_0 306; SVE2_128_NOMAX-NEXT: ldr d0, [x1] 307; SVE2_128_NOMAX-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] 308; SVE2_128_NOMAX-NEXT: tbl z0.b, { z0.b }, z1.b 309; SVE2_128_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 310; SVE2_128_NOMAX-NEXT: ret 311; 312; SVE2_NOMIN_NOMAX-LABEL: shuffle_op1_poison: 313; SVE2_NOMIN_NOMAX: // %bb.0: 314; SVE2_NOMIN_NOMAX-NEXT: adrp x8, .LCPI4_0 315; SVE2_NOMIN_NOMAX-NEXT: ldr d0, [x1] 316; SVE2_NOMIN_NOMAX-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] 317; SVE2_NOMIN_NOMAX-NEXT: tbl z0.b, { z0.b }, z1.b 318; SVE2_NOMIN_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 319; SVE2_NOMIN_NOMAX-NEXT: ret 320; 321; SVE2_MIN_256_NOMAX-LABEL: shuffle_op1_poison: 322; SVE2_MIN_256_NOMAX: // %bb.0: 323; SVE2_MIN_256_NOMAX-NEXT: ptrue p0.b, vl32 324; SVE2_MIN_256_NOMAX-NEXT: adrp x8, .LCPI4_0 325; SVE2_MIN_256_NOMAX-NEXT: add x8, x8, :lo12:.LCPI4_0 326; SVE2_MIN_256_NOMAX-NEXT: ldr d1, [x1] 327; SVE2_MIN_256_NOMAX-NEXT: ld1b { z0.b }, p0/z, [x8] 328; SVE2_MIN_256_NOMAX-NEXT: tbl z0.b, { z1.b }, z0.b 329; SVE2_MIN_256_NOMAX-NEXT: // kill: def $d0 killed $d0 killed $z0 330; SVE2_MIN_256_NOMAX-NEXT: ret 331 %op2 = load <8 x i8>, ptr %b 332 %1 = shufflevector <8 x i8> poison, <8 x i8> %op2, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 15> 333 ret <8 x i8> %1 334} 335 336; In this function, we could not represent indexes for the second operand 337; because for i8 type, the maximum constant in the mask is 256. 338define <8 x i8> @negative_test_shuffle_index_size_op_both_maxhw(ptr %a, ptr %b) "target-features"="+sve2" vscale_range(16,16) { 339; CHECK-LABEL: negative_test_shuffle_index_size_op_both_maxhw: 340; CHECK: // %bb.0: 341; CHECK-NEXT: ldr d0, [x1] 342; CHECK-NEXT: ldr d1, [x0] 343; CHECK-NEXT: mov z2.b, z0.b[3] 344; CHECK-NEXT: mov z3.b, z0.b[2] 345; CHECK-NEXT: mov z4.b, z0.b[1] 346; CHECK-NEXT: mov z1.b, z1.b[1] 347; CHECK-NEXT: mov z5.b, z0.b[7] 348; CHECK-NEXT: mov z6.b, z0.b[6] 349; CHECK-NEXT: mov z0.b, z0.b[4] 350; CHECK-NEXT: zip1 z2.b, z3.b, z2.b 351; CHECK-NEXT: zip1 z1.b, z1.b, z4.b 352; CHECK-NEXT: zip1 z3.b, z6.b, z5.b 353; CHECK-NEXT: zip1 z0.b, z0.b, z0.b 354; CHECK-NEXT: zip1 z1.h, z1.h, z2.h 355; CHECK-NEXT: zip1 z0.h, z0.h, z3.h 356; CHECK-NEXT: zip1 z0.s, z1.s, z0.s 357; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 358; CHECK-NEXT: ret 359 %op1 = load <8 x i8>, ptr %a 360 %op2 = load <8 x i8>, ptr %b 361 %1 = shufflevector <8 x i8> %op1, <8 x i8> %op2, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 15> 362 ret <8 x i8> %1 363} 364 365; CHECK: .LCPI6_0: 366; CHECK-NEXT: .byte 0 // 0x0 367; CHECK-NEXT: .byte 7 // 0x7 368; CHECK-NEXT: .byte 2 // 0x2 369; CHECK-NEXT: .byte 3 // 0x3 370; CHECK-NEXT: .byte 4 // 0x4 371; CHECK-NEXT: .byte 5 // 0x5 372; CHECK-NEXT: .byte 6 // 0x6 373; CHECK-NEXT: .byte 7 // 0x7 374; CHECK-NEXT: .byte 255 // 0xff 375; CHECK-NEXT: .byte 255 // 0xff 376define <8 x i8> @shuffle_index_size_op1_maxhw(ptr %a, ptr %b) "target-features"="+sve2" vscale_range(16,16) { 377; CHECK-LABEL: shuffle_index_size_op1_maxhw: 378; CHECK: // %bb.0: 379; CHECK-NEXT: ptrue p0.b 380; CHECK-NEXT: adrp x8, .LCPI6_0 381; CHECK-NEXT: add x8, x8, :lo12:.LCPI6_0 382; CHECK-NEXT: ldr d1, [x0] 383; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8] 384; CHECK-NEXT: tbl z0.b, { z1.b }, z0.b 385; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 386; CHECK-NEXT: ret 387 %op1 = load <8 x i8>, ptr %a 388 %op2 = load <8 x i8>, ptr %b 389 %1 = shufflevector <8 x i8> %op1, <8 x i8> %op2, <8 x i32> <i32 0, i32 7, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 390 ret <8 x i8> %1 391} 392 393; SVE2_128: .LCPI7_0: 394; SVE2_128-NEXT: .hword 1 // 0x1 395; SVE2_128-NEXT: .hword 9 // 0x9 396; SVE2_128-NEXT: .hword 10 // 0xa 397; SVE2_128-NEXT: .hword 11 // 0xb 398; SVE2_128-NEXT: .hword 12 // 0xc 399; SVE2_128-NEXT: .hword 12 // 0xc 400; SVE2_128-NEXT: .hword 14 // 0xe 401; SVE2_128-NEXT: .hword 15 // 0xf 402 403; SVE2_128_NOMAX: .LCPI7_0: 404; SVE2_128_NOMAX-NEXT: .hword 0 // 0x0 405; SVE2_128_NOMAX-NEXT: .hword 1 // 0x1 406; SVE2_128_NOMAX-NEXT: .hword 1 // 0x1 407; SVE2_128_NOMAX-NEXT: .hword 1 // 0x1 408; SVE2_128_NOMAX-NEXT: .hword 1 // 0x1 409; SVE2_128_NOMAX-NEXT: .hword 1 // 0x1 410; SVE2_128_NOMAX-NEXT: .hword 1 // 0x1 411; SVE2_128_NOMAX-NEXT: .hword 1 // 0x1 412; SVE2_128_NOMAX-NEXT:.LCPI7_1: 413; SVE2_128_NOMAX-NEXT: .hword 1 // 0x1 414; SVE2_128_NOMAX-NEXT: .hword 1 // 0x1 415; SVE2_128_NOMAX-NEXT: .hword 2 // 0x2 416; SVE2_128_NOMAX-NEXT: .hword 3 // 0x3 417; SVE2_128_NOMAX-NEXT: .hword 4 // 0x4 418; SVE2_128_NOMAX-NEXT: .hword 4 // 0x4 419; SVE2_128_NOMAX-NEXT: .hword 6 // 0x6 420; SVE2_128_NOMAX-NEXT: .hword 7 // 0x7 421 422; SVE2_NOMIN_NOMAX: .LCPI7_0: 423; SVE2_NOMIN_NOMAX-NEXT: .hword 0 // 0x0 424; SVE2_NOMIN_NOMAX-NEXT: .hword 1 // 0x1 425; SVE2_NOMIN_NOMAX-NEXT: .hword 1 // 0x1 426; SVE2_NOMIN_NOMAX-NEXT: .hword 1 // 0x1 427; SVE2_NOMIN_NOMAX-NEXT: .hword 1 // 0x1 428; SVE2_NOMIN_NOMAX-NEXT: .hword 1 // 0x1 429; SVE2_NOMIN_NOMAX-NEXT: .hword 1 // 0x1 430; SVE2_NOMIN_NOMAX-NEXT: .hword 1 // 0x1 431; SVE2_NOMIN_NOMAX-NEXT:.LCPI7_1: 432; SVE2_NOMIN_NOMAX-NEXT: .hword 1 // 0x1 433; SVE2_NOMIN_NOMAX-NEXT: .hword 1 // 0x1 434; SVE2_NOMIN_NOMAX-NEXT: .hword 2 // 0x2 435; SVE2_NOMIN_NOMAX-NEXT: .hword 3 // 0x3 436; SVE2_NOMIN_NOMAX-NEXT: .hword 4 // 0x4 437; SVE2_NOMIN_NOMAX-NEXT: .hword 4 // 0x4 438; SVE2_NOMIN_NOMAX-NEXT: .hword 6 // 0x6 439; SVE2_NOMIN_NOMAX-NEXT: .hword 7 // 0x7 440 441; SVE2_MIN_256_NOMAX: .LCPI7_0: 442; SVE2_MIN_256_NOMAX-NEXT: .hword 0 // 0x0 443; SVE2_MIN_256_NOMAX-NEXT: .hword 1 // 0x1 444; SVE2_MIN_256_NOMAX-NEXT: .hword 1 // 0x1 445; SVE2_MIN_256_NOMAX-NEXT: .hword 1 // 0x1 446; SVE2_MIN_256_NOMAX-NEXT: .hword 1 // 0x1 447; SVE2_MIN_256_NOMAX-NEXT: .hword 1 // 0x1 448; SVE2_MIN_256_NOMAX-NEXT: .hword 1 // 0x1 449; SVE2_MIN_256_NOMAX-NEXT: .hword 1 // 0x1 450; SVE2_MIN_256_NOMAX-NEXT: .hword 0 // 0x0 451; SVE2_MIN_256_NOMAX-NEXT: .hword 0 // 0x0 452; SVE2_MIN_256_NOMAX-NEXT: .hword 0 // 0x0 453; SVE2_MIN_256_NOMAX-NEXT: .hword 0 // 0x0 454; SVE2_MIN_256_NOMAX-NEXT: .hword 0 // 0x0 455; SVE2_MIN_256_NOMAX-NEXT: .hword 0 // 0x0 456; SVE2_MIN_256_NOMAX-NEXT: .hword 0 // 0x0 457; SVE2_MIN_256_NOMAX-NEXT: .hword 0 // 0x0 458; SVE2_MIN_256_NOMAX-NEXT:.LCPI7_1: 459; SVE2_MIN_256_NOMAX-NEXT: .hword 1 // 0x1 460; SVE2_MIN_256_NOMAX-NEXT: .hword 1 // 0x1 461; SVE2_MIN_256_NOMAX-NEXT: .hword 2 // 0x2 462; SVE2_MIN_256_NOMAX-NEXT: .hword 3 // 0x3 463; SVE2_MIN_256_NOMAX-NEXT: .hword 4 // 0x4 464; SVE2_MIN_256_NOMAX-NEXT: .hword 4 // 0x4 465; SVE2_MIN_256_NOMAX-NEXT: .hword 6 // 0x6 466; SVE2_MIN_256_NOMAX-NEXT: .hword 7 // 0x7 467; SVE2_MIN_256_NOMAX-NEXT: .hword 65535 // 0xffff 468; SVE2_MIN_256_NOMAX-NEXT: .hword 65535 // 0xffff 469; SVE2_MIN_256_NOMAX-NEXT: .hword 65535 // 0xffff 470; SVE2_MIN_256_NOMAX-NEXT: .hword 65535 // 0xffff 471; SVE2_MIN_256_NOMAX-NEXT: .hword 65535 // 0xffff 472; SVE2_MIN_256_NOMAX-NEXT: .hword 65535 // 0xffff 473; SVE2_MIN_256_NOMAX-NEXT: .hword 65535 // 0xffff 474; SVE2_MIN_256_NOMAX-NEXT: .hword 65535 // 0xffff 475define <8 x i16> @shuffle_index_indices_from_both_ops_i16(ptr %a, ptr %b) { 476; SVE2_128-LABEL: shuffle_index_indices_from_both_ops_i16: 477; SVE2_128: // %bb.0: 478; SVE2_128-NEXT: adrp x8, .LCPI7_0 479; SVE2_128-NEXT: ldr q0, [x0] 480; SVE2_128-NEXT: ldr q1, [x1] 481; SVE2_128-NEXT: ldr q2, [x8, :lo12:.LCPI7_0] 482; SVE2_128-NEXT: tbl z0.h, { z0.h, z1.h }, z2.h 483; SVE2_128-NEXT: // kill: def $q0 killed $q0 killed $z0 484; SVE2_128-NEXT: ret 485; 486; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_both_ops_i16: 487; SVE2_128_NOMAX: // %bb.0: 488; SVE2_128_NOMAX-NEXT: cnth x8 489; SVE2_128_NOMAX-NEXT: adrp x9, .LCPI7_0 490; SVE2_128_NOMAX-NEXT: adrp x10, .LCPI7_1 491; SVE2_128_NOMAX-NEXT: mov z0.h, w8 492; SVE2_128_NOMAX-NEXT: ldr q1, [x9, :lo12:.LCPI7_0] 493; SVE2_128_NOMAX-NEXT: ldr q2, [x10, :lo12:.LCPI7_1] 494; SVE2_128_NOMAX-NEXT: ptrue p0.h, vl8 495; SVE2_128_NOMAX-NEXT: mad z0.h, p0/m, z1.h, z2.h 496; SVE2_128_NOMAX-NEXT: ldr q1, [x0] 497; SVE2_128_NOMAX-NEXT: ldr q2, [x1] 498; SVE2_128_NOMAX-NEXT: tbl z0.h, { z1.h, z2.h }, z0.h 499; SVE2_128_NOMAX-NEXT: // kill: def $q0 killed $q0 killed $z0 500; SVE2_128_NOMAX-NEXT: ret 501; 502; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_both_ops_i16: 503; SVE2_NOMIN_NOMAX: // %bb.0: 504; SVE2_NOMIN_NOMAX-NEXT: cnth x8 505; SVE2_NOMIN_NOMAX-NEXT: adrp x9, .LCPI7_0 506; SVE2_NOMIN_NOMAX-NEXT: adrp x10, .LCPI7_1 507; SVE2_NOMIN_NOMAX-NEXT: mov z0.h, w8 508; SVE2_NOMIN_NOMAX-NEXT: ldr q1, [x9, :lo12:.LCPI7_0] 509; SVE2_NOMIN_NOMAX-NEXT: ldr q2, [x10, :lo12:.LCPI7_1] 510; SVE2_NOMIN_NOMAX-NEXT: ptrue p0.h, vl8 511; SVE2_NOMIN_NOMAX-NEXT: mad z0.h, p0/m, z1.h, z2.h 512; SVE2_NOMIN_NOMAX-NEXT: ldr q1, [x0] 513; SVE2_NOMIN_NOMAX-NEXT: ldr q2, [x1] 514; SVE2_NOMIN_NOMAX-NEXT: tbl z0.h, { z1.h, z2.h }, z0.h 515; SVE2_NOMIN_NOMAX-NEXT: // kill: def $q0 killed $q0 killed $z0 516; SVE2_NOMIN_NOMAX-NEXT: ret 517; 518; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_both_ops_i16: 519; SVE2_MIN_256_NOMAX: // %bb.0: 520; SVE2_MIN_256_NOMAX-NEXT: ptrue p0.h, vl16 521; SVE2_MIN_256_NOMAX-NEXT: adrp x8, .LCPI7_0 522; SVE2_MIN_256_NOMAX-NEXT: add x8, x8, :lo12:.LCPI7_0 523; SVE2_MIN_256_NOMAX-NEXT: adrp x9, .LCPI7_1 524; SVE2_MIN_256_NOMAX-NEXT: add x9, x9, :lo12:.LCPI7_1 525; SVE2_MIN_256_NOMAX-NEXT: cnth x10 526; SVE2_MIN_256_NOMAX-NEXT: ld1h { z0.h }, p0/z, [x8] 527; SVE2_MIN_256_NOMAX-NEXT: ld1h { z1.h }, p0/z, [x9] 528; SVE2_MIN_256_NOMAX-NEXT: mov z2.h, w10 529; SVE2_MIN_256_NOMAX-NEXT: mad z0.h, p0/m, z2.h, z1.h 530; SVE2_MIN_256_NOMAX-NEXT: ldr q1, [x0] 531; SVE2_MIN_256_NOMAX-NEXT: ldr q2, [x1] 532; SVE2_MIN_256_NOMAX-NEXT: tbl z0.h, { z1.h, z2.h }, z0.h 533; SVE2_MIN_256_NOMAX-NEXT: // kill: def $q0 killed $q0 killed $z0 534; SVE2_MIN_256_NOMAX-NEXT: ret 535 %op1 = load <8 x i16>, ptr %a 536 %op2 = load <8 x i16>, ptr %b 537 %1 = shufflevector <8 x i16> %op1, <8 x i16> %op2, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 15> 538 ret <8 x i16> %1 539} 540