xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle-tbl.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+sve2 -force-streaming-compatible -aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128  < %s | FileCheck %s -check-prefixes=CHECK,SVE2_128
3; RUN: llc -mattr=+sve2 -force-streaming-compatible -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -check-prefixes=CHECK,SVE2_128_NOMAX
4; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s -check-prefixes=CHECK,SVE2_NOMIN_NOMAX
5; RUN: llc -mattr=+sve2 -force-streaming-compatible -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,SVE2_MIN_256_NOMAX
6
7target triple = "aarch64-unknown-linux-gnu"
8
9; SVE2_128: .LCPI0_0:
10; SVE2_128-NEXT:        .byte   0                               // 0x0
11; SVE2_128-NEXT:        .byte   7                               // 0x7
12; SVE2_128-NEXT:        .byte   2                               // 0x2
13; SVE2_128-NEXT:        .byte   3                               // 0x3
14; SVE2_128-NEXT:        .byte   4                               // 0x4
15; SVE2_128-NEXT:        .byte   5                               // 0x5
16; SVE2_128-NEXT:        .byte   6                               // 0x6
17; SVE2_128-NEXT:        .byte   7                               // 0x7
18; SVE2_128-NEXT:        .byte   255                             // 0xff
19; SVE2_128-NEXT:        .byte   255                             // 0xff
20define <8 x i8> @shuffle_index_indices_from_op1(ptr %a, ptr %b) {
21; SVE2_128-LABEL: shuffle_index_indices_from_op1:
22; SVE2_128:       // %bb.0:
23; SVE2_128-NEXT:    adrp x8, .LCPI0_0
24; SVE2_128-NEXT:    ldr d0, [x0]
25; SVE2_128-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
26; SVE2_128-NEXT:    tbl z0.b, { z0.b }, z1.b
27; SVE2_128-NEXT:    // kill: def $d0 killed $d0 killed $z0
28; SVE2_128-NEXT:    ret
29;
30; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_op1:
31; SVE2_128_NOMAX:       // %bb.0:
32; SVE2_128_NOMAX-NEXT:    adrp x8, .LCPI0_0
33; SVE2_128_NOMAX-NEXT:    ldr d0, [x0]
34; SVE2_128_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
35; SVE2_128_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
36; SVE2_128_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
37; SVE2_128_NOMAX-NEXT:    ret
38;
39; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_op1:
40; SVE2_NOMIN_NOMAX:       // %bb.0:
41; SVE2_NOMIN_NOMAX-NEXT:    adrp x8, .LCPI0_0
42; SVE2_NOMIN_NOMAX-NEXT:    ldr d0, [x0]
43; SVE2_NOMIN_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
44; SVE2_NOMIN_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
45; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
46; SVE2_NOMIN_NOMAX-NEXT:    ret
47;
48; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_op1:
49; SVE2_MIN_256_NOMAX:       // %bb.0:
50; SVE2_MIN_256_NOMAX-NEXT:    ptrue p0.b, vl32
51; SVE2_MIN_256_NOMAX-NEXT:    adrp x8, .LCPI0_0
52; SVE2_MIN_256_NOMAX-NEXT:    add x8, x8, :lo12:.LCPI0_0
53; SVE2_MIN_256_NOMAX-NEXT:    ldr d1, [x0]
54; SVE2_MIN_256_NOMAX-NEXT:    ld1b { z0.b }, p0/z, [x8]
55; SVE2_MIN_256_NOMAX-NEXT:    tbl z0.b, { z1.b }, z0.b
56; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
57; SVE2_MIN_256_NOMAX-NEXT:    ret
58  %op1 = load <8 x i8>, ptr %a
59  %op2 = load <8 x i8>, ptr %b
60  %1 = shufflevector <8 x i8> %op1, <8 x i8> %op2, <8 x i32> <i32 0, i32 7, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
61  ret <8 x i8> %1
62}
63
64; SVE2_128: .LCPI1_0:
65; SVE2_128-NEXT:        .byte   0                               // 0x0
66; SVE2_128-NEXT:        .byte   1                               // 0x1
67; SVE2_128-NEXT:        .byte   1                               // 0x1
68; SVE2_128-NEXT:        .byte   3                               // 0x3
69; SVE2_128-NEXT:        .byte   4                               // 0x4
70; SVE2_128-NEXT:        .byte   7                               // 0x7
71; SVE2_128-NEXT:        .byte   6                               // 0x6
72; SVE2_128-NEXT:        .byte   7                               // 0x7
73; SVE2_128-NEXT:        .byte   255                             // 0xff
74; SVE2_128-NEXT:        .byte   255                             // 0xff
75define <8 x i8> @shuffle_index_indices_from_op2(ptr %a, ptr %b) {
76; SVE2_128-LABEL: shuffle_index_indices_from_op2:
77; SVE2_128:       // %bb.0:
78; SVE2_128-NEXT:    adrp x8, .LCPI1_0
79; SVE2_128-NEXT:    ldr d0, [x1]
80; SVE2_128-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
81; SVE2_128-NEXT:    tbl z0.b, { z0.b }, z1.b
82; SVE2_128-NEXT:    // kill: def $d0 killed $d0 killed $z0
83; SVE2_128-NEXT:    ret
84;
85; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_op2:
86; SVE2_128_NOMAX:       // %bb.0:
87; SVE2_128_NOMAX-NEXT:    adrp x8, .LCPI1_0
88; SVE2_128_NOMAX-NEXT:    ldr d0, [x1]
89; SVE2_128_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
90; SVE2_128_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
91; SVE2_128_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
92; SVE2_128_NOMAX-NEXT:    ret
93;
94; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_op2:
95; SVE2_NOMIN_NOMAX:       // %bb.0:
96; SVE2_NOMIN_NOMAX-NEXT:    adrp x8, .LCPI1_0
97; SVE2_NOMIN_NOMAX-NEXT:    ldr d0, [x1]
98; SVE2_NOMIN_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
99; SVE2_NOMIN_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
100; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
101; SVE2_NOMIN_NOMAX-NEXT:    ret
102;
103; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_op2:
104; SVE2_MIN_256_NOMAX:       // %bb.0:
105; SVE2_MIN_256_NOMAX-NEXT:    ptrue p0.b, vl32
106; SVE2_MIN_256_NOMAX-NEXT:    adrp x8, .LCPI1_0
107; SVE2_MIN_256_NOMAX-NEXT:    add x8, x8, :lo12:.LCPI1_0
108; SVE2_MIN_256_NOMAX-NEXT:    ldr d1, [x1]
109; SVE2_MIN_256_NOMAX-NEXT:    ld1b { z0.b }, p0/z, [x8]
110; SVE2_MIN_256_NOMAX-NEXT:    tbl z0.b, { z1.b }, z0.b
111; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
112; SVE2_MIN_256_NOMAX-NEXT:    ret
113  %op1 = load <8 x i8>, ptr %a
114  %op2 = load <8 x i8>, ptr %b
115  %1 = shufflevector <8 x i8> %op1, <8 x i8> %op2, <8 x i32> <i32 8, i32 9, i32 9, i32 11, i32 12, i32 15, i32 14, i32 15>
116  ret <8 x i8> %1
117}
118
119; SVE2_128: .LCPI2_0:
120; SVE2_128-NEXT:        .byte   1                               // 0x1
121; SVE2_128-NEXT:        .byte   17                              // 0x11
122; SVE2_128-NEXT:        .byte   18                              // 0x12
123; SVE2_128-NEXT:        .byte   19                              // 0x13
124; SVE2_128-NEXT:        .byte   20                              // 0x14
125; SVE2_128-NEXT:        .byte   20                              // 0x14
126; SVE2_128-NEXT:        .byte   22                              // 0x16
127; SVE2_128-NEXT:        .byte   23                              // 0x17
128; SVE2_128-NEXT:        .byte   255                             // 0xff
129; SVE2_128-NEXT:        .byte   255                             // 0xff
130define <8 x i8> @shuffle_index_indices_from_both_ops(ptr %a, ptr %b) {
131; SVE2_128-LABEL: shuffle_index_indices_from_both_ops:
132; SVE2_128:       // %bb.0:
133; SVE2_128-NEXT:    adrp x8, .LCPI2_0
134; SVE2_128-NEXT:    ldr d0, [x0]
135; SVE2_128-NEXT:    ldr d1, [x1]
136; SVE2_128-NEXT:    ldr q2, [x8, :lo12:.LCPI2_0]
137; SVE2_128-NEXT:    tbl z0.b, { z0.b, z1.b }, z2.b
138; SVE2_128-NEXT:    // kill: def $d0 killed $d0 killed $z0
139; SVE2_128-NEXT:    ret
140;
141; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_both_ops:
142; SVE2_128_NOMAX:       // %bb.0:
143; SVE2_128_NOMAX-NEXT:    ldr d0, [x1]
144; SVE2_128_NOMAX-NEXT:    ldr d1, [x0]
145; SVE2_128_NOMAX-NEXT:    mov z2.b, z0.b[3]
146; SVE2_128_NOMAX-NEXT:    mov z3.b, z0.b[2]
147; SVE2_128_NOMAX-NEXT:    mov z4.b, z0.b[1]
148; SVE2_128_NOMAX-NEXT:    mov z1.b, z1.b[1]
149; SVE2_128_NOMAX-NEXT:    mov z5.b, z0.b[7]
150; SVE2_128_NOMAX-NEXT:    mov z6.b, z0.b[6]
151; SVE2_128_NOMAX-NEXT:    mov z0.b, z0.b[4]
152; SVE2_128_NOMAX-NEXT:    zip1 z2.b, z3.b, z2.b
153; SVE2_128_NOMAX-NEXT:    zip1 z1.b, z1.b, z4.b
154; SVE2_128_NOMAX-NEXT:    zip1 z3.b, z6.b, z5.b
155; SVE2_128_NOMAX-NEXT:    zip1 z0.b, z0.b, z0.b
156; SVE2_128_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
157; SVE2_128_NOMAX-NEXT:    zip1 z0.h, z0.h, z3.h
158; SVE2_128_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
159; SVE2_128_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
160; SVE2_128_NOMAX-NEXT:    ret
161;
162; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_both_ops:
163; SVE2_NOMIN_NOMAX:       // %bb.0:
164; SVE2_NOMIN_NOMAX-NEXT:    ldr d0, [x1]
165; SVE2_NOMIN_NOMAX-NEXT:    ldr d1, [x0]
166; SVE2_NOMIN_NOMAX-NEXT:    mov z2.b, z0.b[3]
167; SVE2_NOMIN_NOMAX-NEXT:    mov z3.b, z0.b[2]
168; SVE2_NOMIN_NOMAX-NEXT:    mov z4.b, z0.b[1]
169; SVE2_NOMIN_NOMAX-NEXT:    mov z1.b, z1.b[1]
170; SVE2_NOMIN_NOMAX-NEXT:    mov z5.b, z0.b[7]
171; SVE2_NOMIN_NOMAX-NEXT:    mov z6.b, z0.b[6]
172; SVE2_NOMIN_NOMAX-NEXT:    mov z0.b, z0.b[4]
173; SVE2_NOMIN_NOMAX-NEXT:    zip1 z2.b, z3.b, z2.b
174; SVE2_NOMIN_NOMAX-NEXT:    zip1 z1.b, z1.b, z4.b
175; SVE2_NOMIN_NOMAX-NEXT:    zip1 z3.b, z6.b, z5.b
176; SVE2_NOMIN_NOMAX-NEXT:    zip1 z0.b, z0.b, z0.b
177; SVE2_NOMIN_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
178; SVE2_NOMIN_NOMAX-NEXT:    zip1 z0.h, z0.h, z3.h
179; SVE2_NOMIN_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
180; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
181; SVE2_NOMIN_NOMAX-NEXT:    ret
182;
183; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_both_ops:
184; SVE2_MIN_256_NOMAX:       // %bb.0:
185; SVE2_MIN_256_NOMAX-NEXT:    ldr d0, [x1]
186; SVE2_MIN_256_NOMAX-NEXT:    ldr d1, [x0]
187; SVE2_MIN_256_NOMAX-NEXT:    mov z2.b, z0.b[3]
188; SVE2_MIN_256_NOMAX-NEXT:    mov z3.b, z0.b[2]
189; SVE2_MIN_256_NOMAX-NEXT:    mov z4.b, z0.b[1]
190; SVE2_MIN_256_NOMAX-NEXT:    mov z1.b, z1.b[1]
191; SVE2_MIN_256_NOMAX-NEXT:    mov z5.b, z0.b[7]
192; SVE2_MIN_256_NOMAX-NEXT:    mov z6.b, z0.b[6]
193; SVE2_MIN_256_NOMAX-NEXT:    mov z0.b, z0.b[4]
194; SVE2_MIN_256_NOMAX-NEXT:    zip1 z2.b, z3.b, z2.b
195; SVE2_MIN_256_NOMAX-NEXT:    zip1 z1.b, z1.b, z4.b
196; SVE2_MIN_256_NOMAX-NEXT:    zip1 z3.b, z6.b, z5.b
197; SVE2_MIN_256_NOMAX-NEXT:    zip1 z0.b, z0.b, z0.b
198; SVE2_MIN_256_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
199; SVE2_MIN_256_NOMAX-NEXT:    zip1 z0.h, z0.h, z3.h
200; SVE2_MIN_256_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
201; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
202; SVE2_MIN_256_NOMAX-NEXT:    ret
203  %op1 = load <8 x i8>, ptr %a
204  %op2 = load <8 x i8>, ptr %b
205  %1 = shufflevector <8 x i8> %op1, <8 x i8> %op2, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 15>
206  ret <8 x i8> %1
207}
208
209; SVE2_128: .LCPI3_0:
210; SVE2_128-NEXT:        .byte   1                               // 0x1
211; SVE2_128-NEXT:        .byte   17                              // 0x11
212; SVE2_128-NEXT:        .byte   18                              // 0x12
213; SVE2_128-NEXT:        .byte   19                              // 0x13
214; SVE2_128-NEXT:        .byte   20                              // 0x14
215; SVE2_128-NEXT:        .byte   20                              // 0x14
216; SVE2_128-NEXT:        .byte   22                              // 0x16
217; SVE2_128-NEXT:        .byte   0                               // 0x0
218; SVE2_128-NEXT:        .byte   255                             // 0xff
219; SVE2_128-NEXT:        .byte   255                             // 0xff
220define <8 x i8> @shuffle_index_poison_value(ptr %a, ptr %b) {
221; SVE2_128-LABEL: shuffle_index_poison_value:
222; SVE2_128:       // %bb.0:
223; SVE2_128-NEXT:    adrp x8, .LCPI3_0
224; SVE2_128-NEXT:    ldr d0, [x0]
225; SVE2_128-NEXT:    ldr d1, [x1]
226; SVE2_128-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
227; SVE2_128-NEXT:    tbl z0.b, { z0.b, z1.b }, z2.b
228; SVE2_128-NEXT:    // kill: def $d0 killed $d0 killed $z0
229; SVE2_128-NEXT:    ret
230;
231; SVE2_128_NOMAX-LABEL: shuffle_index_poison_value:
232; SVE2_128_NOMAX:       // %bb.0:
233; SVE2_128_NOMAX-NEXT:    ldr d0, [x1]
234; SVE2_128_NOMAX-NEXT:    ldr d1, [x0]
235; SVE2_128_NOMAX-NEXT:    mov z2.b, z0.b[3]
236; SVE2_128_NOMAX-NEXT:    mov z3.b, z0.b[2]
237; SVE2_128_NOMAX-NEXT:    mov z4.b, z0.b[1]
238; SVE2_128_NOMAX-NEXT:    mov z1.b, z1.b[1]
239; SVE2_128_NOMAX-NEXT:    mov z5.b, z0.b[4]
240; SVE2_128_NOMAX-NEXT:    mov z0.b, z0.b[6]
241; SVE2_128_NOMAX-NEXT:    zip1 z2.b, z3.b, z2.b
242; SVE2_128_NOMAX-NEXT:    zip1 z1.b, z1.b, z4.b
243; SVE2_128_NOMAX-NEXT:    zip1 z3.b, z5.b, z5.b
244; SVE2_128_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
245; SVE2_128_NOMAX-NEXT:    zip1 z0.h, z3.h, z0.h
246; SVE2_128_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
247; SVE2_128_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
248; SVE2_128_NOMAX-NEXT:    ret
249;
250; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_poison_value:
251; SVE2_NOMIN_NOMAX:       // %bb.0:
252; SVE2_NOMIN_NOMAX-NEXT:    ldr d0, [x1]
253; SVE2_NOMIN_NOMAX-NEXT:    ldr d1, [x0]
254; SVE2_NOMIN_NOMAX-NEXT:    mov z2.b, z0.b[3]
255; SVE2_NOMIN_NOMAX-NEXT:    mov z3.b, z0.b[2]
256; SVE2_NOMIN_NOMAX-NEXT:    mov z4.b, z0.b[1]
257; SVE2_NOMIN_NOMAX-NEXT:    mov z1.b, z1.b[1]
258; SVE2_NOMIN_NOMAX-NEXT:    mov z5.b, z0.b[4]
259; SVE2_NOMIN_NOMAX-NEXT:    mov z0.b, z0.b[6]
260; SVE2_NOMIN_NOMAX-NEXT:    zip1 z2.b, z3.b, z2.b
261; SVE2_NOMIN_NOMAX-NEXT:    zip1 z1.b, z1.b, z4.b
262; SVE2_NOMIN_NOMAX-NEXT:    zip1 z3.b, z5.b, z5.b
263; SVE2_NOMIN_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
264; SVE2_NOMIN_NOMAX-NEXT:    zip1 z0.h, z3.h, z0.h
265; SVE2_NOMIN_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
266; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
267; SVE2_NOMIN_NOMAX-NEXT:    ret
268;
269; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_poison_value:
270; SVE2_MIN_256_NOMAX:       // %bb.0:
271; SVE2_MIN_256_NOMAX-NEXT:    ldr d0, [x1]
272; SVE2_MIN_256_NOMAX-NEXT:    ldr d1, [x0]
273; SVE2_MIN_256_NOMAX-NEXT:    mov z2.b, z0.b[3]
274; SVE2_MIN_256_NOMAX-NEXT:    mov z3.b, z0.b[2]
275; SVE2_MIN_256_NOMAX-NEXT:    mov z4.b, z0.b[1]
276; SVE2_MIN_256_NOMAX-NEXT:    mov z1.b, z1.b[1]
277; SVE2_MIN_256_NOMAX-NEXT:    mov z5.b, z0.b[4]
278; SVE2_MIN_256_NOMAX-NEXT:    mov z0.b, z0.b[6]
279; SVE2_MIN_256_NOMAX-NEXT:    zip1 z2.b, z3.b, z2.b
280; SVE2_MIN_256_NOMAX-NEXT:    zip1 z1.b, z1.b, z4.b
281; SVE2_MIN_256_NOMAX-NEXT:    zip1 z3.b, z5.b, z5.b
282; SVE2_MIN_256_NOMAX-NEXT:    zip1 z1.h, z1.h, z2.h
283; SVE2_MIN_256_NOMAX-NEXT:    zip1 z0.h, z3.h, z0.h
284; SVE2_MIN_256_NOMAX-NEXT:    zip1 z0.s, z1.s, z0.s
285; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
286; SVE2_MIN_256_NOMAX-NEXT:    ret
287  %op1 = load <8 x i8>, ptr %a
288  %op2 = load <8 x i8>, ptr %b
289  %1 = shufflevector <8 x i8> %op1, <8 x i8> %op2, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 poison>
290  ret <8 x i8> %1
291}
292
293define <8 x i8> @shuffle_op1_poison(ptr %a, ptr %b) {
294; SVE2_128-LABEL: shuffle_op1_poison:
295; SVE2_128:       // %bb.0:
296; SVE2_128-NEXT:    adrp x8, .LCPI4_0
297; SVE2_128-NEXT:    ldr d0, [x1]
298; SVE2_128-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
299; SVE2_128-NEXT:    tbl z0.b, { z0.b }, z1.b
300; SVE2_128-NEXT:    // kill: def $d0 killed $d0 killed $z0
301; SVE2_128-NEXT:    ret
302;
303; SVE2_128_NOMAX-LABEL: shuffle_op1_poison:
304; SVE2_128_NOMAX:       // %bb.0:
305; SVE2_128_NOMAX-NEXT:    adrp x8, .LCPI4_0
306; SVE2_128_NOMAX-NEXT:    ldr d0, [x1]
307; SVE2_128_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
308; SVE2_128_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
309; SVE2_128_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
310; SVE2_128_NOMAX-NEXT:    ret
311;
312; SVE2_NOMIN_NOMAX-LABEL: shuffle_op1_poison:
313; SVE2_NOMIN_NOMAX:       // %bb.0:
314; SVE2_NOMIN_NOMAX-NEXT:    adrp x8, .LCPI4_0
315; SVE2_NOMIN_NOMAX-NEXT:    ldr d0, [x1]
316; SVE2_NOMIN_NOMAX-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
317; SVE2_NOMIN_NOMAX-NEXT:    tbl z0.b, { z0.b }, z1.b
318; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
319; SVE2_NOMIN_NOMAX-NEXT:    ret
320;
321; SVE2_MIN_256_NOMAX-LABEL: shuffle_op1_poison:
322; SVE2_MIN_256_NOMAX:       // %bb.0:
323; SVE2_MIN_256_NOMAX-NEXT:    ptrue p0.b, vl32
324; SVE2_MIN_256_NOMAX-NEXT:    adrp x8, .LCPI4_0
325; SVE2_MIN_256_NOMAX-NEXT:    add x8, x8, :lo12:.LCPI4_0
326; SVE2_MIN_256_NOMAX-NEXT:    ldr d1, [x1]
327; SVE2_MIN_256_NOMAX-NEXT:    ld1b { z0.b }, p0/z, [x8]
328; SVE2_MIN_256_NOMAX-NEXT:    tbl z0.b, { z1.b }, z0.b
329; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $d0 killed $d0 killed $z0
330; SVE2_MIN_256_NOMAX-NEXT:    ret
331  %op2 = load <8 x i8>, ptr %b
332  %1 = shufflevector <8 x i8> poison, <8 x i8> %op2, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 15>
333  ret <8 x i8> %1
334}
335
336; In this function, we could not represent indexes for the second operand
337; because for i8 type, the maximum constant in the mask is 256.
338define <8 x i8> @negative_test_shuffle_index_size_op_both_maxhw(ptr %a, ptr %b) "target-features"="+sve2" vscale_range(16,16) {
339; CHECK-LABEL: negative_test_shuffle_index_size_op_both_maxhw:
340; CHECK:       // %bb.0:
341; CHECK-NEXT:    ldr d0, [x1]
342; CHECK-NEXT:    ldr d1, [x0]
343; CHECK-NEXT:    mov z2.b, z0.b[3]
344; CHECK-NEXT:    mov z3.b, z0.b[2]
345; CHECK-NEXT:    mov z4.b, z0.b[1]
346; CHECK-NEXT:    mov z1.b, z1.b[1]
347; CHECK-NEXT:    mov z5.b, z0.b[7]
348; CHECK-NEXT:    mov z6.b, z0.b[6]
349; CHECK-NEXT:    mov z0.b, z0.b[4]
350; CHECK-NEXT:    zip1 z2.b, z3.b, z2.b
351; CHECK-NEXT:    zip1 z1.b, z1.b, z4.b
352; CHECK-NEXT:    zip1 z3.b, z6.b, z5.b
353; CHECK-NEXT:    zip1 z0.b, z0.b, z0.b
354; CHECK-NEXT:    zip1 z1.h, z1.h, z2.h
355; CHECK-NEXT:    zip1 z0.h, z0.h, z3.h
356; CHECK-NEXT:    zip1 z0.s, z1.s, z0.s
357; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
358; CHECK-NEXT:    ret
359  %op1 = load <8 x i8>, ptr %a
360  %op2 = load <8 x i8>, ptr %b
361  %1 = shufflevector <8 x i8> %op1, <8 x i8> %op2, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 15>
362  ret <8 x i8> %1
363}
364
365; CHECK: .LCPI6_0:
366; CHECK-NEXT:        .byte   0                               // 0x0
367; CHECK-NEXT:        .byte   7                               // 0x7
368; CHECK-NEXT:        .byte   2                               // 0x2
369; CHECK-NEXT:        .byte   3                               // 0x3
370; CHECK-NEXT:        .byte   4                               // 0x4
371; CHECK-NEXT:        .byte   5                               // 0x5
372; CHECK-NEXT:        .byte   6                               // 0x6
373; CHECK-NEXT:        .byte   7                               // 0x7
374; CHECK-NEXT:        .byte   255                             // 0xff
375; CHECK-NEXT:        .byte   255                             // 0xff
376define <8 x i8> @shuffle_index_size_op1_maxhw(ptr %a, ptr %b) "target-features"="+sve2" vscale_range(16,16) {
377; CHECK-LABEL: shuffle_index_size_op1_maxhw:
378; CHECK:       // %bb.0:
379; CHECK-NEXT:    ptrue p0.b
380; CHECK-NEXT:    adrp x8, .LCPI6_0
381; CHECK-NEXT:    add x8, x8, :lo12:.LCPI6_0
382; CHECK-NEXT:    ldr d1, [x0]
383; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x8]
384; CHECK-NEXT:    tbl z0.b, { z1.b }, z0.b
385; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
386; CHECK-NEXT:    ret
387  %op1 = load <8 x i8>, ptr %a
388  %op2 = load <8 x i8>, ptr %b
389  %1 = shufflevector <8 x i8> %op1, <8 x i8> %op2, <8 x i32> <i32 0, i32 7, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
390  ret <8 x i8> %1
391}
392
393; SVE2_128: .LCPI7_0:
394; SVE2_128-NEXT:        .hword  1                               // 0x1
395; SVE2_128-NEXT:        .hword  9                               // 0x9
396; SVE2_128-NEXT:        .hword  10                              // 0xa
397; SVE2_128-NEXT:        .hword  11                              // 0xb
398; SVE2_128-NEXT:        .hword  12                              // 0xc
399; SVE2_128-NEXT:        .hword  12                              // 0xc
400; SVE2_128-NEXT:        .hword  14                              // 0xe
401; SVE2_128-NEXT:        .hword  15                              // 0xf
402
403; SVE2_128_NOMAX: .LCPI7_0:
404; SVE2_128_NOMAX-NEXT:        .hword  0                               // 0x0
405; SVE2_128_NOMAX-NEXT:        .hword  1                               // 0x1
406; SVE2_128_NOMAX-NEXT:        .hword  1                               // 0x1
407; SVE2_128_NOMAX-NEXT:        .hword  1                               // 0x1
408; SVE2_128_NOMAX-NEXT:        .hword  1                               // 0x1
409; SVE2_128_NOMAX-NEXT:        .hword  1                               // 0x1
410; SVE2_128_NOMAX-NEXT:        .hword  1                               // 0x1
411; SVE2_128_NOMAX-NEXT:        .hword  1                               // 0x1
412; SVE2_128_NOMAX-NEXT:.LCPI7_1:
413; SVE2_128_NOMAX-NEXT:        .hword  1                               // 0x1
414; SVE2_128_NOMAX-NEXT:        .hword  1                               // 0x1
415; SVE2_128_NOMAX-NEXT:        .hword  2                               // 0x2
416; SVE2_128_NOMAX-NEXT:        .hword  3                               // 0x3
417; SVE2_128_NOMAX-NEXT:        .hword  4                               // 0x4
418; SVE2_128_NOMAX-NEXT:        .hword  4                               // 0x4
419; SVE2_128_NOMAX-NEXT:        .hword  6                               // 0x6
420; SVE2_128_NOMAX-NEXT:        .hword  7                               // 0x7
421
422; SVE2_NOMIN_NOMAX: .LCPI7_0:
423; SVE2_NOMIN_NOMAX-NEXT:        .hword  0                               // 0x0
424; SVE2_NOMIN_NOMAX-NEXT:        .hword  1                               // 0x1
425; SVE2_NOMIN_NOMAX-NEXT:        .hword  1                               // 0x1
426; SVE2_NOMIN_NOMAX-NEXT:        .hword  1                               // 0x1
427; SVE2_NOMIN_NOMAX-NEXT:        .hword  1                               // 0x1
428; SVE2_NOMIN_NOMAX-NEXT:        .hword  1                               // 0x1
429; SVE2_NOMIN_NOMAX-NEXT:        .hword  1                               // 0x1
430; SVE2_NOMIN_NOMAX-NEXT:        .hword  1                               // 0x1
431; SVE2_NOMIN_NOMAX-NEXT:.LCPI7_1:
432; SVE2_NOMIN_NOMAX-NEXT:        .hword  1                               // 0x1
433; SVE2_NOMIN_NOMAX-NEXT:        .hword  1                               // 0x1
434; SVE2_NOMIN_NOMAX-NEXT:        .hword  2                               // 0x2
435; SVE2_NOMIN_NOMAX-NEXT:        .hword  3                               // 0x3
436; SVE2_NOMIN_NOMAX-NEXT:        .hword  4                               // 0x4
437; SVE2_NOMIN_NOMAX-NEXT:        .hword  4                               // 0x4
438; SVE2_NOMIN_NOMAX-NEXT:        .hword  6                               // 0x6
439; SVE2_NOMIN_NOMAX-NEXT:        .hword  7                               // 0x7
440
441; SVE2_MIN_256_NOMAX: .LCPI7_0:
442; SVE2_MIN_256_NOMAX-NEXT:        .hword  0                               // 0x0
443; SVE2_MIN_256_NOMAX-NEXT:        .hword  1                               // 0x1
444; SVE2_MIN_256_NOMAX-NEXT:        .hword  1                               // 0x1
445; SVE2_MIN_256_NOMAX-NEXT:        .hword  1                               // 0x1
446; SVE2_MIN_256_NOMAX-NEXT:        .hword  1                               // 0x1
447; SVE2_MIN_256_NOMAX-NEXT:        .hword  1                               // 0x1
448; SVE2_MIN_256_NOMAX-NEXT:        .hword  1                               // 0x1
449; SVE2_MIN_256_NOMAX-NEXT:        .hword  1                               // 0x1
450; SVE2_MIN_256_NOMAX-NEXT:        .hword  0                               // 0x0
451; SVE2_MIN_256_NOMAX-NEXT:        .hword  0                               // 0x0
452; SVE2_MIN_256_NOMAX-NEXT:        .hword  0                               // 0x0
453; SVE2_MIN_256_NOMAX-NEXT:        .hword  0                               // 0x0
454; SVE2_MIN_256_NOMAX-NEXT:        .hword  0                               // 0x0
455; SVE2_MIN_256_NOMAX-NEXT:        .hword  0                               // 0x0
456; SVE2_MIN_256_NOMAX-NEXT:        .hword  0                               // 0x0
457; SVE2_MIN_256_NOMAX-NEXT:        .hword  0                               // 0x0
458; SVE2_MIN_256_NOMAX-NEXT:.LCPI7_1:
459; SVE2_MIN_256_NOMAX-NEXT:        .hword  1                               // 0x1
460; SVE2_MIN_256_NOMAX-NEXT:        .hword  1                               // 0x1
461; SVE2_MIN_256_NOMAX-NEXT:        .hword  2                               // 0x2
462; SVE2_MIN_256_NOMAX-NEXT:        .hword  3                               // 0x3
463; SVE2_MIN_256_NOMAX-NEXT:        .hword  4                               // 0x4
464; SVE2_MIN_256_NOMAX-NEXT:        .hword  4                               // 0x4
465; SVE2_MIN_256_NOMAX-NEXT:        .hword  6                               // 0x6
466; SVE2_MIN_256_NOMAX-NEXT:        .hword  7                               // 0x7
467; SVE2_MIN_256_NOMAX-NEXT:        .hword  65535                           // 0xffff
468; SVE2_MIN_256_NOMAX-NEXT:        .hword  65535                           // 0xffff
469; SVE2_MIN_256_NOMAX-NEXT:        .hword  65535                           // 0xffff
470; SVE2_MIN_256_NOMAX-NEXT:        .hword  65535                           // 0xffff
471; SVE2_MIN_256_NOMAX-NEXT:        .hword  65535                           // 0xffff
472; SVE2_MIN_256_NOMAX-NEXT:        .hword  65535                           // 0xffff
473; SVE2_MIN_256_NOMAX-NEXT:        .hword  65535                           // 0xffff
474; SVE2_MIN_256_NOMAX-NEXT:        .hword  65535                           // 0xffff
475define <8 x i16> @shuffle_index_indices_from_both_ops_i16(ptr %a, ptr %b) {
476; SVE2_128-LABEL: shuffle_index_indices_from_both_ops_i16:
477; SVE2_128:       // %bb.0:
478; SVE2_128-NEXT:    adrp x8, .LCPI7_0
479; SVE2_128-NEXT:    ldr q0, [x0]
480; SVE2_128-NEXT:    ldr q1, [x1]
481; SVE2_128-NEXT:    ldr q2, [x8, :lo12:.LCPI7_0]
482; SVE2_128-NEXT:    tbl z0.h, { z0.h, z1.h }, z2.h
483; SVE2_128-NEXT:    // kill: def $q0 killed $q0 killed $z0
484; SVE2_128-NEXT:    ret
485;
486; SVE2_128_NOMAX-LABEL: shuffle_index_indices_from_both_ops_i16:
487; SVE2_128_NOMAX:       // %bb.0:
488; SVE2_128_NOMAX-NEXT:    cnth x8
489; SVE2_128_NOMAX-NEXT:    adrp x9, .LCPI7_0
490; SVE2_128_NOMAX-NEXT:    adrp x10, .LCPI7_1
491; SVE2_128_NOMAX-NEXT:    mov z0.h, w8
492; SVE2_128_NOMAX-NEXT:    ldr q1, [x9, :lo12:.LCPI7_0]
493; SVE2_128_NOMAX-NEXT:    ldr q2, [x10, :lo12:.LCPI7_1]
494; SVE2_128_NOMAX-NEXT:    ptrue p0.h, vl8
495; SVE2_128_NOMAX-NEXT:    mad z0.h, p0/m, z1.h, z2.h
496; SVE2_128_NOMAX-NEXT:    ldr q1, [x0]
497; SVE2_128_NOMAX-NEXT:    ldr q2, [x1]
498; SVE2_128_NOMAX-NEXT:    tbl z0.h, { z1.h, z2.h }, z0.h
499; SVE2_128_NOMAX-NEXT:    // kill: def $q0 killed $q0 killed $z0
500; SVE2_128_NOMAX-NEXT:    ret
501;
502; SVE2_NOMIN_NOMAX-LABEL: shuffle_index_indices_from_both_ops_i16:
503; SVE2_NOMIN_NOMAX:       // %bb.0:
504; SVE2_NOMIN_NOMAX-NEXT:    cnth x8
505; SVE2_NOMIN_NOMAX-NEXT:    adrp x9, .LCPI7_0
506; SVE2_NOMIN_NOMAX-NEXT:    adrp x10, .LCPI7_1
507; SVE2_NOMIN_NOMAX-NEXT:    mov z0.h, w8
508; SVE2_NOMIN_NOMAX-NEXT:    ldr q1, [x9, :lo12:.LCPI7_0]
509; SVE2_NOMIN_NOMAX-NEXT:    ldr q2, [x10, :lo12:.LCPI7_1]
510; SVE2_NOMIN_NOMAX-NEXT:    ptrue p0.h, vl8
511; SVE2_NOMIN_NOMAX-NEXT:    mad z0.h, p0/m, z1.h, z2.h
512; SVE2_NOMIN_NOMAX-NEXT:    ldr q1, [x0]
513; SVE2_NOMIN_NOMAX-NEXT:    ldr q2, [x1]
514; SVE2_NOMIN_NOMAX-NEXT:    tbl z0.h, { z1.h, z2.h }, z0.h
515; SVE2_NOMIN_NOMAX-NEXT:    // kill: def $q0 killed $q0 killed $z0
516; SVE2_NOMIN_NOMAX-NEXT:    ret
517;
518; SVE2_MIN_256_NOMAX-LABEL: shuffle_index_indices_from_both_ops_i16:
519; SVE2_MIN_256_NOMAX:       // %bb.0:
520; SVE2_MIN_256_NOMAX-NEXT:    ptrue p0.h, vl16
521; SVE2_MIN_256_NOMAX-NEXT:    adrp x8, .LCPI7_0
522; SVE2_MIN_256_NOMAX-NEXT:    add x8, x8, :lo12:.LCPI7_0
523; SVE2_MIN_256_NOMAX-NEXT:    adrp x9, .LCPI7_1
524; SVE2_MIN_256_NOMAX-NEXT:    add x9, x9, :lo12:.LCPI7_1
525; SVE2_MIN_256_NOMAX-NEXT:    cnth x10
526; SVE2_MIN_256_NOMAX-NEXT:    ld1h { z0.h }, p0/z, [x8]
527; SVE2_MIN_256_NOMAX-NEXT:    ld1h { z1.h }, p0/z, [x9]
528; SVE2_MIN_256_NOMAX-NEXT:    mov z2.h, w10
529; SVE2_MIN_256_NOMAX-NEXT:    mad z0.h, p0/m, z2.h, z1.h
530; SVE2_MIN_256_NOMAX-NEXT:    ldr q1, [x0]
531; SVE2_MIN_256_NOMAX-NEXT:    ldr q2, [x1]
532; SVE2_MIN_256_NOMAX-NEXT:    tbl z0.h, { z1.h, z2.h }, z0.h
533; SVE2_MIN_256_NOMAX-NEXT:    // kill: def $q0 killed $q0 killed $z0
534; SVE2_MIN_256_NOMAX-NEXT:    ret
535  %op1 = load <8 x i16>, ptr %a
536  %op2 = load <8 x i16>, ptr %b
537  %1 = shufflevector <8 x i16> %op1, <8 x i16> %op2, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 15>
538  ret <8 x i16> %1
539}
540