xref: /llvm-project/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
3; RUN: llc -mtriple=aarch64_be-unknown-linux -o - %s | FileCheck --check-prefix=CHECK-BE %s
4; RUN: llc -mtriple=aarch64_be-unknown-linux -aarch64-enable-ext-to-tbl=false -o - %s | FileCheck --check-prefix=CHECK-DISABLE %s
5
6; CHECK-LABEL: lCPI0_0:
7; CHECK-NEXT:	.byte	0                               ; 0x0
8; CHECK-NEXT:	.byte	4                               ; 0x4
9; CHECK-NEXT:	.byte	8                               ; 0x8
10; CHECK-NEXT:	.byte	12                              ; 0xc
11; CHECK-NEXT:	.byte	16                              ; 0x10
12; CHECK-NEXT:	.byte	20                              ; 0x14
13; CHECK-NEXT:	.byte	24                              ; 0x18
14; CHECK-NEXT:	.byte	28                              ; 0x1c
15; CHECK-NEXT:	.byte	32                              ; 0x20
16; CHECK-NEXT:	.byte	36                              ; 0x24
17; CHECK-NEXT:	.byte	40                              ; 0x28
18; CHECK-NEXT:	.byte	44                              ; 0x2c
19; CHECK-NEXT:	.byte	48                              ; 0x30
20; CHECK-NEXT:	.byte	52                              ; 0x34
21; CHECK-NEXT:	.byte	56                              ; 0x38
22; CHECK-NEXT:	.byte	60                              ; 0x3c
23
24; CHECK-BE-LABEL:   .LCPI0_0:
25; CHECK-BE-NEXT:   .byte    3                               // 0x3
26; CHECK-BE-NEXT:   .byte    7                               // 0x7
27; CHECK-BE-NEXT:   .byte    11                              // 0xb
28; CHECK-BE-NEXT:   .byte    15                              // 0xf
29; CHECK-BE-NEXT:   .byte    19                              // 0x13
30; CHECK-BE-NEXT:   .byte    23                              // 0x17
31; CHECK-BE-NEXT:   .byte    27                              // 0x1b
32; CHECK-BE-NEXT:   .byte    31                              // 0x1f
33; CHECK-BE-NEXT:   .byte    35                              // 0x23
34; CHECK-BE-NEXT:   .byte    39                              // 0x27
35; CHECK-BE-NEXT:   .byte    43                              // 0x2b
36; CHECK-BE-NEXT:   .byte    47                              // 0x2f
37; CHECK-BE-NEXT:   .byte    51                              // 0x33
38; CHECK-BE-NEXT:   .byte    55                              // 0x37
39; CHECK-BE-NEXT:   .byte    59                              // 0x3b
40; CHECK-BE-NEXT:   .byte    63                              // 0x3f
41
42; It's profitable to use a single tbl.4 instruction to lower the truncate.
43define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) {
44; CHECK-LABEL: trunc_v16i32_to_v16i8_in_loop:
45; CHECK:       ; %bb.0: ; %entry
46; CHECK-NEXT:  Lloh0:
47; CHECK-NEXT:    adrp x8, lCPI0_0@PAGE
48; CHECK-NEXT:  Lloh1:
49; CHECK-NEXT:    ldr q0, [x8, lCPI0_0@PAGEOFF]
50; CHECK-NEXT:    mov x8, xzr
51; CHECK-NEXT:  LBB0_1: ; %loop
52; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
53; CHECK-NEXT:    add x9, x0, x8, lsl #6
54; CHECK-NEXT:    ldp q1, q2, [x9]
55; CHECK-NEXT:    ldp q3, q4, [x9, #32]
56; CHECK-NEXT:    tbl.16b v1, { v1, v2, v3, v4 }, v0
57; CHECK-NEXT:    str q1, [x1, x8, lsl #4]
58; CHECK-NEXT:    add x8, x8, #1
59; CHECK-NEXT:    cmp x8, #1000
60; CHECK-NEXT:    b.eq LBB0_1
61; CHECK-NEXT:  ; %bb.2: ; %exit
62; CHECK-NEXT:    ret
63; CHECK-NEXT:    .loh AdrpLdr Lloh0, Lloh1
64;
65; CHECK-BE-LABEL: trunc_v16i32_to_v16i8_in_loop:
66; CHECK-BE:       // %bb.0: // %entry
67; CHECK-BE-NEXT:    adrp x8, .LCPI0_0
68; CHECK-BE-NEXT:    add x8, x8, :lo12:.LCPI0_0
69; CHECK-BE-NEXT:    ld1 { v0.16b }, [x8]
70; CHECK-BE-NEXT:    mov x8, xzr
71; CHECK-BE-NEXT:  .LBB0_1: // %loop
72; CHECK-BE-NEXT:    // =>This Inner Loop Header: Depth=1
73; CHECK-BE-NEXT:    add x9, x0, x8, lsl #6
74; CHECK-BE-NEXT:    add x10, x9, #16
75; CHECK-BE-NEXT:    ld1 { v1.16b }, [x9]
76; CHECK-BE-NEXT:    add x11, x9, #32
77; CHECK-BE-NEXT:    ld1 { v2.16b }, [x10]
78; CHECK-BE-NEXT:    add x9, x9, #48
79; CHECK-BE-NEXT:    ld1 { v3.16b }, [x11]
80; CHECK-BE-NEXT:    ld1 { v4.16b }, [x9]
81; CHECK-BE-NEXT:    add x9, x1, x8, lsl #4
82; CHECK-BE-NEXT:    add x8, x8, #1
83; CHECK-BE-NEXT:    cmp x8, #1000
84; CHECK-BE-NEXT:    tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
85; CHECK-BE-NEXT:    st1 { v1.16b }, [x9]
86; CHECK-BE-NEXT:    b.eq .LBB0_1
87; CHECK-BE-NEXT:  // %bb.2: // %exit
88; CHECK-BE-NEXT:    ret
89;
90; CHECK-DISABLE-LABEL: trunc_v16i32_to_v16i8_in_loop:
91; CHECK-DISABLE:       // %bb.0: // %entry
92; CHECK-DISABLE-NEXT:    mov x8, xzr
93; CHECK-DISABLE-NEXT:  .LBB0_1: // %loop
94; CHECK-DISABLE-NEXT:    // =>This Inner Loop Header: Depth=1
95; CHECK-DISABLE-NEXT:    add x9, x0, x8, lsl #6
96; CHECK-DISABLE-NEXT:    ld1 { v0.4s }, [x9]
97; CHECK-DISABLE-NEXT:    add x10, x9, #16
98; CHECK-DISABLE-NEXT:    add x11, x9, #48
99; CHECK-DISABLE-NEXT:    add x9, x9, #32
100; CHECK-DISABLE-NEXT:    ld1 { v1.4s }, [x10]
101; CHECK-DISABLE-NEXT:    ld1 { v2.4s }, [x11]
102; CHECK-DISABLE-NEXT:    ld1 { v3.4s }, [x9]
103; CHECK-DISABLE-NEXT:    add x9, x1, x8, lsl #4
104; CHECK-DISABLE-NEXT:    add x8, x8, #1
105; CHECK-DISABLE-NEXT:    cmp x8, #1000
106; CHECK-DISABLE-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
107; CHECK-DISABLE-NEXT:    uzp1 v2.8h, v3.8h, v2.8h
108; CHECK-DISABLE-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
109; CHECK-DISABLE-NEXT:    st1 { v0.16b }, [x9]
110; CHECK-DISABLE-NEXT:    b.eq .LBB0_1
111; CHECK-DISABLE-NEXT:  // %bb.2: // %exit
112; CHECK-DISABLE-NEXT:    ret
113entry:
114  br label %loop
115
116loop:
117  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
118  %gep.A = getelementptr inbounds <16 x i32>, ptr %A, i64 %iv
119  %l.A = load <16 x i32>, ptr %gep.A
120  %trunc = trunc <16 x i32> %l.A to <16 x i8>
121  %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
122  store <16 x i8> %trunc, ptr %gep.dst
123  %iv.next = add i64 %iv, 1
124  %ec = icmp eq i64 %iv.next, 1000
125  br i1 %ec, label %loop, label %exit
126
127exit:
128  ret void
129}
130
131; Not profitable to use tbl, as materializing the masks requires more
132; instructions.
133define void @trunc_v16i32_to_v16i8_no_loop(ptr %A, ptr %dst) {
134; CHECK-LABEL: trunc_v16i32_to_v16i8_no_loop:
135; CHECK:       ; %bb.0: ; %entry
136; CHECK-NEXT:    ldp q1, q0, [x0]
137; CHECK-NEXT:    ldp q3, q2, [x0, #32]
138; CHECK-NEXT:    uzp1.8h v0, v1, v0
139; CHECK-NEXT:    uzp1.8h v2, v3, v2
140; CHECK-NEXT:    uzp1.16b v0, v0, v2
141; CHECK-NEXT:    str q0, [x1]
142; CHECK-NEXT:    ret
143;
144; CHECK-BE-LABEL: trunc_v16i32_to_v16i8_no_loop:
145; CHECK-BE:       // %bb.0: // %entry
146; CHECK-BE-NEXT:    add x8, x0, #16
147; CHECK-BE-NEXT:    add x9, x0, #48
148; CHECK-BE-NEXT:    add x10, x0, #32
149; CHECK-BE-NEXT:    ld1 { v0.4s }, [x0]
150; CHECK-BE-NEXT:    ld1 { v1.4s }, [x8]
151; CHECK-BE-NEXT:    ld1 { v2.4s }, [x9]
152; CHECK-BE-NEXT:    ld1 { v3.4s }, [x10]
153; CHECK-BE-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
154; CHECK-BE-NEXT:    uzp1 v2.8h, v3.8h, v2.8h
155; CHECK-BE-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
156; CHECK-BE-NEXT:    st1 { v0.16b }, [x1]
157; CHECK-BE-NEXT:    ret
158;
159; CHECK-DISABLE-LABEL: trunc_v16i32_to_v16i8_no_loop:
160; CHECK-DISABLE:       // %bb.0: // %entry
161; CHECK-DISABLE-NEXT:    add x8, x0, #16
162; CHECK-DISABLE-NEXT:    add x9, x0, #48
163; CHECK-DISABLE-NEXT:    add x10, x0, #32
164; CHECK-DISABLE-NEXT:    ld1 { v0.4s }, [x0]
165; CHECK-DISABLE-NEXT:    ld1 { v1.4s }, [x8]
166; CHECK-DISABLE-NEXT:    ld1 { v2.4s }, [x9]
167; CHECK-DISABLE-NEXT:    ld1 { v3.4s }, [x10]
168; CHECK-DISABLE-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
169; CHECK-DISABLE-NEXT:    uzp1 v2.8h, v3.8h, v2.8h
170; CHECK-DISABLE-NEXT:    uzp1 v0.16b, v0.16b, v2.16b
171; CHECK-DISABLE-NEXT:    st1 { v0.16b }, [x1]
172; CHECK-DISABLE-NEXT:    ret
173entry:
174  %l.A = load <16 x i32>, ptr %A
175  %trunc = trunc <16 x i32> %l.A to <16 x i8>
176  store <16 x i8> %trunc, ptr %dst
177  ret void
178}
179
180
181; CHECK-LABEL: lCPI2_0:
182; CHECK-NEXT:     .byte    0                               ; 0x0
183; CHECK-NEXT:     .byte    4                               ; 0x4
184; CHECK-NEXT:     .byte    8                               ; 0x8
185; CHECK-NEXT:     .byte    12                              ; 0xc
186; CHECK-NEXT:     .byte    16                              ; 0x10
187; CHECK-NEXT:     .byte    20                              ; 0x14
188; CHECK-NEXT:     .byte    24                              ; 0x18
189; CHECK-NEXT:     .byte    28                              ; 0x1c
190; CHECK-NEXT:     .byte    255                             ; 0xff
191; CHECK-NEXT:     .byte    255                             ; 0xff
192; CHECK-NEXT:     .byte    255                             ; 0xff
193; CHECK-NEXT:     .byte    255                             ; 0xff
194; CHECK-NEXT:     .byte    255                             ; 0xff
195; CHECK-NEXT:     .byte    255                             ; 0xff
196; CHECK-NEXT:     .byte    255                             ; 0xff
197; CHECK-NEXT:     .byte    255                             ; 0xff
198
199; CHECK-BE-LABEL: .LCPI2_0:
200; CHECK-BE-NEXT:     .byte    3                               // 0x3
201; CHECK-BE-NEXT:     .byte    7                               // 0x7
202; CHECK-BE-NEXT:     .byte    11                              // 0xb
203; CHECK-BE-NEXT:     .byte    15                              // 0xf
204; CHECK-BE-NEXT:     .byte    19                              // 0x13
205; CHECK-BE-NEXT:     .byte    23                              // 0x17
206; CHECK-BE-NEXT:     .byte    27                              // 0x1b
207; CHECK-BE-NEXT:     .byte    31                              // 0x1f
208; CHECK-BE-NEXT:     .byte    255                             // 0xff
209; CHECK-BE-NEXT:     .byte    255                             // 0xff
210; CHECK-BE-NEXT:     .byte    255                             // 0xff
211; CHECK-BE-NEXT:     .byte    255                             // 0xff
212; CHECK-BE-NEXT:     .byte    255                             // 0xff
213; CHECK-BE-NEXT:     .byte    255                             // 0xff
214; CHECK-BE-NEXT:     .byte    255                             // 0xff
215; CHECK-BE-NEXT:     .byte    255                             // 0xff
216; It's profitable to use a single tbl.2 instruction to lower the truncate.
217define void @trunc_v8i32_to_v8i8_in_loop(ptr %A, ptr %dst) {
218; CHECK-LABEL: trunc_v8i32_to_v8i8_in_loop:
219; CHECK:       ; %bb.0: ; %entry
220; CHECK-NEXT:  Lloh2:
221; CHECK-NEXT:    adrp x8, lCPI2_0@PAGE
222; CHECK-NEXT:  Lloh3:
223; CHECK-NEXT:    ldr q0, [x8, lCPI2_0@PAGEOFF]
224; CHECK-NEXT:    mov x8, xzr
225; CHECK-NEXT:  LBB2_1: ; %loop
226; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
227; CHECK-NEXT:    add x9, x0, x8, lsl #5
228; CHECK-NEXT:    ldp q1, q2, [x9]
229; CHECK-NEXT:    tbl.16b v1, { v1, v2 }, v0
230; CHECK-NEXT:    str d1, [x1, x8, lsl #3]
231; CHECK-NEXT:    add x8, x8, #1
232; CHECK-NEXT:    cmp x8, #1000
233; CHECK-NEXT:    b.eq LBB2_1
234; CHECK-NEXT:  ; %bb.2: ; %exit
235; CHECK-NEXT:    ret
236; CHECK-NEXT:    .loh AdrpLdr Lloh2, Lloh3
237;
238; CHECK-BE-LABEL: trunc_v8i32_to_v8i8_in_loop:
239; CHECK-BE:       // %bb.0: // %entry
240; CHECK-BE-NEXT:    adrp x8, .LCPI2_0
241; CHECK-BE-NEXT:    add x8, x8, :lo12:.LCPI2_0
242; CHECK-BE-NEXT:    ld1 { v0.16b }, [x8]
243; CHECK-BE-NEXT:    mov x8, xzr
244; CHECK-BE-NEXT:  .LBB2_1: // %loop
245; CHECK-BE-NEXT:    // =>This Inner Loop Header: Depth=1
246; CHECK-BE-NEXT:    add x9, x0, x8, lsl #5
247; CHECK-BE-NEXT:    add x10, x9, #16
248; CHECK-BE-NEXT:    ld1 { v1.16b }, [x9]
249; CHECK-BE-NEXT:    add x9, x1, x8, lsl #3
250; CHECK-BE-NEXT:    ld1 { v2.16b }, [x10]
251; CHECK-BE-NEXT:    add x8, x8, #1
252; CHECK-BE-NEXT:    cmp x8, #1000
253; CHECK-BE-NEXT:    tbl v1.16b, { v1.16b, v2.16b }, v0.16b
254; CHECK-BE-NEXT:    st1 { v1.8b }, [x9]
255; CHECK-BE-NEXT:    b.eq .LBB2_1
256; CHECK-BE-NEXT:  // %bb.2: // %exit
257; CHECK-BE-NEXT:    ret
258;
259; CHECK-DISABLE-LABEL: trunc_v8i32_to_v8i8_in_loop:
260; CHECK-DISABLE:       // %bb.0: // %entry
261; CHECK-DISABLE-NEXT:    mov x8, xzr
262; CHECK-DISABLE-NEXT:  .LBB2_1: // %loop
263; CHECK-DISABLE-NEXT:    // =>This Inner Loop Header: Depth=1
264; CHECK-DISABLE-NEXT:    add x9, x0, x8, lsl #5
265; CHECK-DISABLE-NEXT:    add x10, x9, #16
266; CHECK-DISABLE-NEXT:    ld1 { v0.4s }, [x9]
267; CHECK-DISABLE-NEXT:    add x9, x1, x8, lsl #3
268; CHECK-DISABLE-NEXT:    ld1 { v1.4s }, [x10]
269; CHECK-DISABLE-NEXT:    add x8, x8, #1
270; CHECK-DISABLE-NEXT:    cmp x8, #1000
271; CHECK-DISABLE-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
272; CHECK-DISABLE-NEXT:    xtn v0.8b, v0.8h
273; CHECK-DISABLE-NEXT:    st1 { v0.8b }, [x9]
274; CHECK-DISABLE-NEXT:    b.eq .LBB2_1
275; CHECK-DISABLE-NEXT:  // %bb.2: // %exit
276; CHECK-DISABLE-NEXT:    ret
277entry:
278  br label %loop
279
280loop:
281  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
282  %gep.A = getelementptr inbounds <8 x i32>, ptr %A, i64 %iv
283  %l.A = load <8 x i32>, ptr %gep.A
284  %trunc = trunc <8 x i32> %l.A to <8 x i8>
285  %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv
286  store <8 x i8> %trunc, ptr %gep.dst
287  %iv.next = add i64 %iv, 1
288  %ec = icmp eq i64 %iv.next, 1000
289  br i1 %ec, label %loop, label %exit
290
291exit:
292  ret void
293}
294
295; CHECK-LABEL:   lCPI3_0:
296; CHECK-NEXT:   	.byte	0                               ; 0x0
297; CHECK-NEXT:   	.byte	8                               ; 0x8
298; CHECK-NEXT:   	.byte	16                              ; 0x10
299; CHECK-NEXT:   	.byte	24                              ; 0x18
300; CHECK-NEXT:   	.byte	32                              ; 0x20
301; CHECK-NEXT:   	.byte	40                              ; 0x28
302; CHECK-NEXT:   	.byte	48                              ; 0x30
303; CHECK-NEXT:   	.byte	56                              ; 0x38
304; CHECK-NEXT:   	.byte	64                              ; 0x40
305; CHECK-NEXT:   	.byte	72                              ; 0x48
306; CHECK-NEXT:   	.byte	80                              ; 0x50
307; CHECK-NEXT:   	.byte	88                              ; 0x58
308; CHECK-NEXT:   	.byte	96                              ; 0x60
309; CHECK-NEXT:   	.byte	104                             ; 0x68
310; CHECK-NEXT:   	.byte	112                             ; 0x70
311; CHECK-NEXT:   	.byte	120                             ; 0x78
312
313; CHECK-BE-LABEL:    .LCPI3_0:
314; CHECK-BE-NEXT:    	.byte	7                               // 0x7
315; CHECK-BE-NEXT:    	.byte	15                              // 0xf
316; CHECK-BE-NEXT:    	.byte	23                              // 0x17
317; CHECK-BE-NEXT:    	.byte	31                              // 0x1f
318; CHECK-BE-NEXT:    	.byte	39                              // 0x27
319; CHECK-BE-NEXT:    	.byte	47                              // 0x2f
320; CHECK-BE-NEXT:    	.byte	55                              // 0x37
321; CHECK-BE-NEXT:    	.byte	63                              // 0x3f
322; CHECK-BE-NEXT:    	.byte	71                              // 0x47
323; CHECK-BE-NEXT:    	.byte	79                              // 0x4f
324; CHECK-BE-NEXT:    	.byte	87                              // 0x57
325; CHECK-BE-NEXT:    	.byte	95                              // 0x5f
326; CHECK-BE-NEXT:    	.byte	103                             // 0x67
327; CHECK-BE-NEXT:    	.byte	111                             // 0x6f
328; CHECK-BE-NEXT:    	.byte	119                             // 0x77
329; CHECK-BE-NEXT:    	.byte	127                             // 0x7f
330define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) {
331; CHECK-LABEL: trunc_v16i64_to_v16i8_in_loop:
332; CHECK:       ; %bb.0: ; %entry
333; CHECK-NEXT:  Lloh4:
334; CHECK-NEXT:    adrp x8, lCPI3_0@PAGE
335; CHECK-NEXT:  Lloh5:
336; CHECK-NEXT:    ldr q0, [x8, lCPI3_0@PAGEOFF]
337; CHECK-NEXT:    mov x8, xzr
338; CHECK-NEXT:  LBB3_1: ; %loop
339; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
340; CHECK-NEXT:    add x9, x0, x8, lsl #7
341; CHECK-NEXT:    ldp q1, q2, [x9]
342; CHECK-NEXT:    ldp q16, q17, [x9, #64]
343; CHECK-NEXT:    ldp q3, q4, [x9, #32]
344; CHECK-NEXT:    ldp q18, q19, [x9, #96]
345; CHECK-NEXT:    tbl.16b v1, { v1, v2, v3, v4 }, v0
346; CHECK-NEXT:    tbl.16b v2, { v16, v17, v18, v19 }, v0
347; CHECK-NEXT:    mov.d v1[1], v2[0]
348; CHECK-NEXT:    str q1, [x1, x8, lsl #4]
349; CHECK-NEXT:    add x8, x8, #1
350; CHECK-NEXT:    cmp x8, #1000
351; CHECK-NEXT:    b.eq LBB3_1
352; CHECK-NEXT:  ; %bb.2: ; %exit
353; CHECK-NEXT:    ret
354; CHECK-NEXT:    .loh AdrpLdr Lloh4, Lloh5
355;
356; CHECK-BE-LABEL: trunc_v16i64_to_v16i8_in_loop:
357; CHECK-BE:       // %bb.0: // %entry
358; CHECK-BE-NEXT:    adrp x8, .LCPI3_0
359; CHECK-BE-NEXT:    add x8, x8, :lo12:.LCPI3_0
360; CHECK-BE-NEXT:    ld1 { v0.16b }, [x8]
361; CHECK-BE-NEXT:    mov x8, xzr
362; CHECK-BE-NEXT:  .LBB3_1: // %loop
363; CHECK-BE-NEXT:    // =>This Inner Loop Header: Depth=1
364; CHECK-BE-NEXT:    add x9, x0, x8, lsl #7
365; CHECK-BE-NEXT:    add x13, x9, #64
366; CHECK-BE-NEXT:    add x12, x9, #80
367; CHECK-BE-NEXT:    add x14, x9, #16
368; CHECK-BE-NEXT:    ld1 { v1.16b }, [x9]
369; CHECK-BE-NEXT:    ld1 { v16.16b }, [x13]
370; CHECK-BE-NEXT:    add x11, x9, #96
371; CHECK-BE-NEXT:    add x13, x9, #32
372; CHECK-BE-NEXT:    ld1 { v2.16b }, [x14]
373; CHECK-BE-NEXT:    ld1 { v17.16b }, [x12]
374; CHECK-BE-NEXT:    add x10, x9, #112
375; CHECK-BE-NEXT:    add x9, x9, #48
376; CHECK-BE-NEXT:    ld1 { v3.16b }, [x13]
377; CHECK-BE-NEXT:    ld1 { v18.16b }, [x11]
378; CHECK-BE-NEXT:    ld1 { v4.16b }, [x9]
379; CHECK-BE-NEXT:    add x9, x1, x8, lsl #4
380; CHECK-BE-NEXT:    ld1 { v19.16b }, [x10]
381; CHECK-BE-NEXT:    add x8, x8, #1
382; CHECK-BE-NEXT:    cmp x8, #1000
383; CHECK-BE-NEXT:    tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
384; CHECK-BE-NEXT:    tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b
385; CHECK-BE-NEXT:    mov v1.d[1], v2.d[0]
386; CHECK-BE-NEXT:    st1 { v1.16b }, [x9]
387; CHECK-BE-NEXT:    b.eq .LBB3_1
388; CHECK-BE-NEXT:  // %bb.2: // %exit
389; CHECK-BE-NEXT:    ret
390;
391; CHECK-DISABLE-LABEL: trunc_v16i64_to_v16i8_in_loop:
392; CHECK-DISABLE:       // %bb.0: // %entry
393; CHECK-DISABLE-NEXT:    mov x8, xzr
394; CHECK-DISABLE-NEXT:  .LBB3_1: // %loop
395; CHECK-DISABLE-NEXT:    // =>This Inner Loop Header: Depth=1
396; CHECK-DISABLE-NEXT:    add x9, x0, x8, lsl #7
397; CHECK-DISABLE-NEXT:    add x10, x9, #16
398; CHECK-DISABLE-NEXT:    add x11, x9, #48
399; CHECK-DISABLE-NEXT:    ld1 { v0.2d }, [x9]
400; CHECK-DISABLE-NEXT:    ld1 { v1.2d }, [x10]
401; CHECK-DISABLE-NEXT:    add x10, x9, #112
402; CHECK-DISABLE-NEXT:    ld1 { v2.2d }, [x11]
403; CHECK-DISABLE-NEXT:    ld1 { v3.2d }, [x10]
404; CHECK-DISABLE-NEXT:    add x10, x9, #96
405; CHECK-DISABLE-NEXT:    add x11, x9, #32
406; CHECK-DISABLE-NEXT:    ld1 { v4.2d }, [x10]
407; CHECK-DISABLE-NEXT:    add x10, x9, #80
408; CHECK-DISABLE-NEXT:    add x9, x9, #64
409; CHECK-DISABLE-NEXT:    ld1 { v5.2d }, [x11]
410; CHECK-DISABLE-NEXT:    ld1 { v6.2d }, [x10]
411; CHECK-DISABLE-NEXT:    ld1 { v7.2d }, [x9]
412; CHECK-DISABLE-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
413; CHECK-DISABLE-NEXT:    add x9, x1, x8, lsl #4
414; CHECK-DISABLE-NEXT:    add x8, x8, #1
415; CHECK-DISABLE-NEXT:    uzp1 v3.4s, v4.4s, v3.4s
416; CHECK-DISABLE-NEXT:    cmp x8, #1000
417; CHECK-DISABLE-NEXT:    uzp1 v4.4s, v7.4s, v6.4s
418; CHECK-DISABLE-NEXT:    uzp1 v2.4s, v5.4s, v2.4s
419; CHECK-DISABLE-NEXT:    uzp1 v1.8h, v4.8h, v3.8h
420; CHECK-DISABLE-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
421; CHECK-DISABLE-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
422; CHECK-DISABLE-NEXT:    st1 { v0.16b }, [x9]
423; CHECK-DISABLE-NEXT:    b.eq .LBB3_1
424; CHECK-DISABLE-NEXT:  // %bb.2: // %exit
425; CHECK-DISABLE-NEXT:    ret
426entry:
427  br label %loop
428
429loop:
430  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
431  %gep.A = getelementptr inbounds <16 x i64>, ptr %A, i64 %iv
432  %l.A = load <16 x i64>, ptr %gep.A
433  %trunc = trunc <16 x i64> %l.A to <16 x i8>
434  %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
435  store <16 x i8> %trunc, ptr %gep.dst
436  %iv.next = add i64 %iv, 1
437  %ec = icmp eq i64 %iv.next, 1000
438  br i1 %ec, label %loop, label %exit
439
440exit:
441  ret void
442}
443
444; CHECK-LABEL: lCPI4_0:
445; CHECK-NEXT: 	.byte	0                               ; 0x0
446; CHECK-NEXT: 	.byte	8                               ; 0x8
447; CHECK-NEXT: 	.byte	16                              ; 0x10
448; CHECK-NEXT: 	.byte	24                              ; 0x18
449; CHECK-NEXT: 	.byte	32                              ; 0x20
450; CHECK-NEXT: 	.byte	40                              ; 0x28
451; CHECK-NEXT: 	.byte	48                              ; 0x30
452; CHECK-NEXT: 	.byte	56                              ; 0x38
453; CHECK-NEXT: 	.byte	255                             ; 0xff
454; CHECK-NEXT: 	.byte	255                             ; 0xff
455; CHECK-NEXT: 	.byte	255                             ; 0xff
456; CHECK-NEXT: 	.byte	255                             ; 0xff
457; CHECK-NEXT: 	.byte	255                             ; 0xff
458; CHECK-NEXT: 	.byte	255                             ; 0xff
459; CHECK-NEXT: 	.byte	255                             ; 0xff
460; CHECK-NEXT: 	.byte	255                             ; 0xff
461
462; CHECK-BE-LABEL:   .LCPI4_0:
463; CHECK-BE-NEXT:   	.byte	7                               // 0x7
464; CHECK-BE-NEXT:   	.byte	15                              // 0xf
465; CHECK-BE-NEXT:   	.byte	23                              // 0x17
466; CHECK-BE-NEXT:   	.byte	31                              // 0x1f
467; CHECK-BE-NEXT:   	.byte	39                              // 0x27
468; CHECK-BE-NEXT:   	.byte	47                              // 0x2f
469; CHECK-BE-NEXT:   	.byte	55                              // 0x37
470; CHECK-BE-NEXT:   	.byte	63                              // 0x3f
471; CHECK-BE-NEXT:   	.byte	255                             // 0xff
472; CHECK-BE-NEXT:   	.byte	255                             // 0xff
473; CHECK-BE-NEXT:   	.byte	255                             // 0xff
474; CHECK-BE-NEXT:   	.byte	255                             // 0xff
475; CHECK-BE-NEXT:   	.byte	255                             // 0xff
476; CHECK-BE-NEXT:   	.byte	255                             // 0xff
477; CHECK-BE-NEXT:   	.byte	255                             // 0xff
478; CHECK-BE-NEXT:   	.byte	255                             // 0xff
479define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) {
480; CHECK-LABEL: trunc_v8i64_to_v8i8_in_loop:
481; CHECK:       ; %bb.0: ; %entry
482; CHECK-NEXT:  Lloh6:
483; CHECK-NEXT:    adrp x8, lCPI4_0@PAGE
484; CHECK-NEXT:  Lloh7:
485; CHECK-NEXT:    ldr q0, [x8, lCPI4_0@PAGEOFF]
486; CHECK-NEXT:    mov x8, xzr
487; CHECK-NEXT:  LBB4_1: ; %loop
488; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
489; CHECK-NEXT:    add x9, x0, x8, lsl #6
490; CHECK-NEXT:    ldp q1, q2, [x9]
491; CHECK-NEXT:    ldp q3, q4, [x9, #32]
492; CHECK-NEXT:    tbl.16b v1, { v1, v2, v3, v4 }, v0
493; CHECK-NEXT:    str d1, [x1, x8, lsl #3]
494; CHECK-NEXT:    add x8, x8, #1
495; CHECK-NEXT:    cmp x8, #1000
496; CHECK-NEXT:    b.eq LBB4_1
497; CHECK-NEXT:  ; %bb.2: ; %exit
498; CHECK-NEXT:    ret
499; CHECK-NEXT:    .loh AdrpLdr Lloh6, Lloh7
500;
501; CHECK-BE-LABEL: trunc_v8i64_to_v8i8_in_loop:
502; CHECK-BE:       // %bb.0: // %entry
503; CHECK-BE-NEXT:    adrp x8, .LCPI4_0
504; CHECK-BE-NEXT:    add x8, x8, :lo12:.LCPI4_0
505; CHECK-BE-NEXT:    ld1 { v0.16b }, [x8]
506; CHECK-BE-NEXT:    mov x8, xzr
507; CHECK-BE-NEXT:  .LBB4_1: // %loop
508; CHECK-BE-NEXT:    // =>This Inner Loop Header: Depth=1
509; CHECK-BE-NEXT:    add x9, x0, x8, lsl #6
510; CHECK-BE-NEXT:    add x10, x9, #16
511; CHECK-BE-NEXT:    ld1 { v1.16b }, [x9]
512; CHECK-BE-NEXT:    add x11, x9, #32
513; CHECK-BE-NEXT:    ld1 { v2.16b }, [x10]
514; CHECK-BE-NEXT:    add x9, x9, #48
515; CHECK-BE-NEXT:    ld1 { v3.16b }, [x11]
516; CHECK-BE-NEXT:    ld1 { v4.16b }, [x9]
517; CHECK-BE-NEXT:    add x9, x1, x8, lsl #3
518; CHECK-BE-NEXT:    add x8, x8, #1
519; CHECK-BE-NEXT:    cmp x8, #1000
520; CHECK-BE-NEXT:    tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b
521; CHECK-BE-NEXT:    st1 { v1.8b }, [x9]
522; CHECK-BE-NEXT:    b.eq .LBB4_1
523; CHECK-BE-NEXT:  // %bb.2: // %exit
524; CHECK-BE-NEXT:    ret
525;
526; CHECK-DISABLE-LABEL: trunc_v8i64_to_v8i8_in_loop:
527; CHECK-DISABLE:       // %bb.0: // %entry
528; CHECK-DISABLE-NEXT:    mov x8, xzr
529; CHECK-DISABLE-NEXT:  .LBB4_1: // %loop
530; CHECK-DISABLE-NEXT:    // =>This Inner Loop Header: Depth=1
531; CHECK-DISABLE-NEXT:    add x9, x0, x8, lsl #6
532; CHECK-DISABLE-NEXT:    ld1 { v0.2d }, [x9]
533; CHECK-DISABLE-NEXT:    add x10, x9, #16
534; CHECK-DISABLE-NEXT:    add x11, x9, #48
535; CHECK-DISABLE-NEXT:    add x9, x9, #32
536; CHECK-DISABLE-NEXT:    ld1 { v1.2d }, [x10]
537; CHECK-DISABLE-NEXT:    ld1 { v2.2d }, [x11]
538; CHECK-DISABLE-NEXT:    ld1 { v3.2d }, [x9]
539; CHECK-DISABLE-NEXT:    add x9, x1, x8, lsl #3
540; CHECK-DISABLE-NEXT:    add x8, x8, #1
541; CHECK-DISABLE-NEXT:    cmp x8, #1000
542; CHECK-DISABLE-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
543; CHECK-DISABLE-NEXT:    uzp1 v2.4s, v3.4s, v2.4s
544; CHECK-DISABLE-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
545; CHECK-DISABLE-NEXT:    xtn v0.8b, v0.8h
546; CHECK-DISABLE-NEXT:    st1 { v0.8b }, [x9]
547; CHECK-DISABLE-NEXT:    b.eq .LBB4_1
548; CHECK-DISABLE-NEXT:  // %bb.2: // %exit
549; CHECK-DISABLE-NEXT:    ret
550entry:
551  br label %loop
552
553loop:
554  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
555  %gep.A = getelementptr inbounds <8 x i64>, ptr %A, i64 %iv
556  %l.A = load <8 x i64>, ptr %gep.A
557  %trunc = trunc <8 x i64> %l.A to <8 x i8>
558  %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv
559  store <8 x i8> %trunc, ptr %gep.dst
560  %iv.next = add i64 %iv, 1
561  %ec = icmp eq i64 %iv.next, 1000
562  br i1 %ec, label %loop, label %exit
563
564exit:
565  ret void
566}
567
568define void @trunc_v8i19_to_v8i8_in_loop(ptr %A, ptr %dst) {
569; CHECK-LABEL: trunc_v8i19_to_v8i8_in_loop:
570; CHECK:       ; %bb.0: ; %entry
571; CHECK-NEXT:    mov x8, xzr
572; CHECK-NEXT:  LBB5_1: ; %loop
573; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
574; CHECK-NEXT:    ldp x9, x10, [x0]
575; CHECK-NEXT:    ldrb w14, [x0, #18]
576; CHECK-NEXT:    ldrh w15, [x0, #16]
577; CHECK-NEXT:    add x0, x0, #32
578; CHECK-NEXT:    lsr w12, w10, #12
579; CHECK-NEXT:    fmov s1, w9
580; CHECK-NEXT:    lsr x11, x9, #19
581; CHECK-NEXT:    lsr x13, x10, #31
582; CHECK-NEXT:    fmov s0, w12
583; CHECK-NEXT:    lsr x12, x9, #38
584; CHECK-NEXT:    extr x9, x10, x9, #57
585; CHECK-NEXT:    mov.s v1[1], w11
586; CHECK-NEXT:    orr x11, x15, x14, lsl #16
587; CHECK-NEXT:    mov.s v0[1], w13
588; CHECK-NEXT:    extr x13, x11, x10, #50
589; CHECK-NEXT:    lsr w10, w11, #5
590; CHECK-NEXT:    mov.s v1[2], w12
591; CHECK-NEXT:    mov.s v0[2], w13
592; CHECK-NEXT:    mov.s v1[3], w9
593; CHECK-NEXT:    mov.s v0[3], w10
594; CHECK-NEXT:    uzp1.8h v0, v1, v0
595; CHECK-NEXT:    xtn.8b v0, v0
596; CHECK-NEXT:    str d0, [x1, x8, lsl #3]
597; CHECK-NEXT:    add x8, x8, #1
598; CHECK-NEXT:    cmp x8, #1000
599; CHECK-NEXT:    b.eq LBB5_1
600; CHECK-NEXT:  ; %bb.2: ; %exit
601; CHECK-NEXT:    ret
602;
603; CHECK-BE-LABEL: trunc_v8i19_to_v8i8_in_loop:
604; CHECK-BE:       // %bb.0: // %entry
605; CHECK-BE-NEXT:    mov x8, xzr
606; CHECK-BE-NEXT:  .LBB5_1: // %loop
607; CHECK-BE-NEXT:    // =>This Inner Loop Header: Depth=1
608; CHECK-BE-NEXT:    ldp x10, x9, [x0]
609; CHECK-BE-NEXT:    ldrh w16, [x0, #16]
610; CHECK-BE-NEXT:    ldrb w17, [x0, #18]
611; CHECK-BE-NEXT:    add x0, x0, #32
612; CHECK-BE-NEXT:    lsl x11, x9, #24
613; CHECK-BE-NEXT:    lsr x12, x9, #40
614; CHECK-BE-NEXT:    lsr x13, x10, #45
615; CHECK-BE-NEXT:    lsl x14, x10, #24
616; CHECK-BE-NEXT:    lsr x15, x10, #40
617; CHECK-BE-NEXT:    extr x12, x12, x11, #57
618; CHECK-BE-NEXT:    fmov s0, w13
619; CHECK-BE-NEXT:    lsr w13, w10, #7
620; CHECK-BE-NEXT:    extr x14, x15, x14, #50
621; CHECK-BE-NEXT:    lsr w15, w9, #14
622; CHECK-BE-NEXT:    extr x9, x10, x9, #40
623; CHECK-BE-NEXT:    fmov s1, w12
624; CHECK-BE-NEXT:    orr w12, w17, w16, lsl #8
625; CHECK-BE-NEXT:    mov v0.s[1], w14
626; CHECK-BE-NEXT:    lsr w9, w9, #12
627; CHECK-BE-NEXT:    orr w11, w12, w11
628; CHECK-BE-NEXT:    mov v1.s[1], w15
629; CHECK-BE-NEXT:    lsr w11, w11, #19
630; CHECK-BE-NEXT:    mov v0.s[2], w13
631; CHECK-BE-NEXT:    mov v1.s[2], w11
632; CHECK-BE-NEXT:    mov v0.s[3], w9
633; CHECK-BE-NEXT:    add x9, x1, x8, lsl #3
634; CHECK-BE-NEXT:    add x8, x8, #1
635; CHECK-BE-NEXT:    cmp x8, #1000
636; CHECK-BE-NEXT:    mov v1.s[3], w12
637; CHECK-BE-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
638; CHECK-BE-NEXT:    xtn v0.8b, v0.8h
639; CHECK-BE-NEXT:    st1 { v0.8b }, [x9]
640; CHECK-BE-NEXT:    b.eq .LBB5_1
641; CHECK-BE-NEXT:  // %bb.2: // %exit
642; CHECK-BE-NEXT:    ret
643;
644; CHECK-DISABLE-LABEL: trunc_v8i19_to_v8i8_in_loop:
645; CHECK-DISABLE:       // %bb.0: // %entry
646; CHECK-DISABLE-NEXT:    mov x8, xzr
647; CHECK-DISABLE-NEXT:  .LBB5_1: // %loop
648; CHECK-DISABLE-NEXT:    // =>This Inner Loop Header: Depth=1
649; CHECK-DISABLE-NEXT:    ldp x10, x9, [x0]
650; CHECK-DISABLE-NEXT:    ldrh w16, [x0, #16]
651; CHECK-DISABLE-NEXT:    ldrb w17, [x0, #18]
652; CHECK-DISABLE-NEXT:    add x0, x0, #32
653; CHECK-DISABLE-NEXT:    lsl x11, x9, #24
654; CHECK-DISABLE-NEXT:    lsr x12, x9, #40
655; CHECK-DISABLE-NEXT:    lsr x13, x10, #45
656; CHECK-DISABLE-NEXT:    lsl x14, x10, #24
657; CHECK-DISABLE-NEXT:    lsr x15, x10, #40
658; CHECK-DISABLE-NEXT:    extr x12, x12, x11, #57
659; CHECK-DISABLE-NEXT:    fmov s0, w13
660; CHECK-DISABLE-NEXT:    lsr w13, w10, #7
661; CHECK-DISABLE-NEXT:    extr x14, x15, x14, #50
662; CHECK-DISABLE-NEXT:    lsr w15, w9, #14
663; CHECK-DISABLE-NEXT:    extr x9, x10, x9, #40
664; CHECK-DISABLE-NEXT:    fmov s1, w12
665; CHECK-DISABLE-NEXT:    orr w12, w17, w16, lsl #8
666; CHECK-DISABLE-NEXT:    mov v0.s[1], w14
667; CHECK-DISABLE-NEXT:    lsr w9, w9, #12
668; CHECK-DISABLE-NEXT:    orr w11, w12, w11
669; CHECK-DISABLE-NEXT:    mov v1.s[1], w15
670; CHECK-DISABLE-NEXT:    lsr w11, w11, #19
671; CHECK-DISABLE-NEXT:    mov v0.s[2], w13
672; CHECK-DISABLE-NEXT:    mov v1.s[2], w11
673; CHECK-DISABLE-NEXT:    mov v0.s[3], w9
674; CHECK-DISABLE-NEXT:    add x9, x1, x8, lsl #3
675; CHECK-DISABLE-NEXT:    add x8, x8, #1
676; CHECK-DISABLE-NEXT:    cmp x8, #1000
677; CHECK-DISABLE-NEXT:    mov v1.s[3], w12
678; CHECK-DISABLE-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
679; CHECK-DISABLE-NEXT:    xtn v0.8b, v0.8h
680; CHECK-DISABLE-NEXT:    st1 { v0.8b }, [x9]
681; CHECK-DISABLE-NEXT:    b.eq .LBB5_1
682; CHECK-DISABLE-NEXT:  // %bb.2: // %exit
683; CHECK-DISABLE-NEXT:    ret
684entry:
685  br label %loop
686
687loop:
688  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
689  %gep.A = getelementptr inbounds <8 x i19>, ptr %A, i64 %iv
690  %l.A = load <8 x i19>, ptr %gep.A
691  %trunc = trunc <8 x i19> %l.A to <8 x i8>
692  %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv
693  store <8 x i8> %trunc, ptr %gep.dst
694  %iv.next = add i64 %iv, 1
695  %ec = icmp eq i64 %iv.next, 1000
696  br i1 %ec, label %loop, label %exit
697
698exit:
699  ret void
700}
701
702define void @trunc_v11i64_to_v11i8_in_loop(ptr %A, ptr %dst) {
703; CHECK-LABEL: trunc_v11i64_to_v11i8_in_loop:
704; CHECK:       ; %bb.0: ; %entry
705; CHECK-NEXT:    mov w8, #1000 ; =0x3e8
706; CHECK-NEXT:  LBB6_1: ; %loop
707; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
708; CHECK-NEXT:    ldp q4, q0, [x0, #48]
709; CHECK-NEXT:    add x9, x1, #10
710; CHECK-NEXT:    ldr d1, [x0, #80]
711; CHECK-NEXT:    ldp q3, q2, [x0]
712; CHECK-NEXT:    ldr q5, [x0, #32]
713; CHECK-NEXT:    subs x8, x8, #1
714; CHECK-NEXT:    add x0, x0, #128
715; CHECK-NEXT:    uzp1.4s v0, v0, v1
716; CHECK-NEXT:    uzp1.4s v1, v5, v4
717; CHECK-NEXT:    uzp1.4s v2, v3, v2
718; CHECK-NEXT:    xtn.4h v0, v0
719; CHECK-NEXT:    uzp1.8h v1, v2, v1
720; CHECK-NEXT:    uzp1.8b v2, v0, v0
721; CHECK-NEXT:    uzp1.16b v0, v1, v0
722; CHECK-NEXT:    st1.b { v2 }[2], [x9]
723; CHECK-NEXT:    add x9, x1, #8
724; CHECK-NEXT:    st1.h { v0 }[4], [x9]
725; CHECK-NEXT:    str d0, [x1], #16
726; CHECK-NEXT:    b.eq LBB6_1
727; CHECK-NEXT:  ; %bb.2: ; %exit
728; CHECK-NEXT:    ret
729;
730; CHECK-BE-LABEL: trunc_v11i64_to_v11i8_in_loop:
731; CHECK-BE:       // %bb.0: // %entry
732; CHECK-BE-NEXT:    mov w8, #1000 // =0x3e8
733; CHECK-BE-NEXT:  .LBB6_1: // %loop
734; CHECK-BE-NEXT:    // =>This Inner Loop Header: Depth=1
735; CHECK-BE-NEXT:    add x9, x0, #64
736; CHECK-BE-NEXT:    add x10, x0, #16
737; CHECK-BE-NEXT:    ld1 { v3.2d }, [x0]
738; CHECK-BE-NEXT:    ld1 { v0.2d }, [x9]
739; CHECK-BE-NEXT:    add x9, x0, #48
740; CHECK-BE-NEXT:    ld1 { v1.2d }, [x10]
741; CHECK-BE-NEXT:    add x10, x0, #32
742; CHECK-BE-NEXT:    ld1 { v2.2d }, [x9]
743; CHECK-BE-NEXT:    ldr d5, [x0, #80]
744; CHECK-BE-NEXT:    ld1 { v4.2d }, [x10]
745; CHECK-BE-NEXT:    add x9, x1, #10
746; CHECK-BE-NEXT:    subs x8, x8, #1
747; CHECK-BE-NEXT:    uzp1 v1.4s, v3.4s, v1.4s
748; CHECK-BE-NEXT:    uzp1 v0.4s, v0.4s, v5.4s
749; CHECK-BE-NEXT:    add x0, x0, #128
750; CHECK-BE-NEXT:    uzp1 v2.4s, v4.4s, v2.4s
751; CHECK-BE-NEXT:    xtn v0.4h, v0.4s
752; CHECK-BE-NEXT:    uzp1 v1.8h, v1.8h, v2.8h
753; CHECK-BE-NEXT:    uzp1 v1.16b, v1.16b, v0.16b
754; CHECK-BE-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
755; CHECK-BE-NEXT:    rev16 v2.16b, v1.16b
756; CHECK-BE-NEXT:    rev64 v1.16b, v1.16b
757; CHECK-BE-NEXT:    st1 { v0.b }[2], [x9]
758; CHECK-BE-NEXT:    add x9, x1, #8
759; CHECK-BE-NEXT:    st1 { v2.h }[4], [x9]
760; CHECK-BE-NEXT:    str d1, [x1], #16
761; CHECK-BE-NEXT:    b.eq .LBB6_1
762; CHECK-BE-NEXT:  // %bb.2: // %exit
763; CHECK-BE-NEXT:    ret
764;
765; CHECK-DISABLE-LABEL: trunc_v11i64_to_v11i8_in_loop:
766; CHECK-DISABLE:       // %bb.0: // %entry
767; CHECK-DISABLE-NEXT:    mov w8, #1000 // =0x3e8
768; CHECK-DISABLE-NEXT:  .LBB6_1: // %loop
769; CHECK-DISABLE-NEXT:    // =>This Inner Loop Header: Depth=1
770; CHECK-DISABLE-NEXT:    add x9, x0, #64
771; CHECK-DISABLE-NEXT:    add x10, x0, #16
772; CHECK-DISABLE-NEXT:    ld1 { v3.2d }, [x0]
773; CHECK-DISABLE-NEXT:    ld1 { v0.2d }, [x9]
774; CHECK-DISABLE-NEXT:    add x9, x0, #48
775; CHECK-DISABLE-NEXT:    ld1 { v1.2d }, [x10]
776; CHECK-DISABLE-NEXT:    add x10, x0, #32
777; CHECK-DISABLE-NEXT:    ld1 { v2.2d }, [x9]
778; CHECK-DISABLE-NEXT:    ldr d5, [x0, #80]
779; CHECK-DISABLE-NEXT:    ld1 { v4.2d }, [x10]
780; CHECK-DISABLE-NEXT:    add x9, x1, #10
781; CHECK-DISABLE-NEXT:    subs x8, x8, #1
782; CHECK-DISABLE-NEXT:    uzp1 v1.4s, v3.4s, v1.4s
783; CHECK-DISABLE-NEXT:    uzp1 v0.4s, v0.4s, v5.4s
784; CHECK-DISABLE-NEXT:    add x0, x0, #128
785; CHECK-DISABLE-NEXT:    uzp1 v2.4s, v4.4s, v2.4s
786; CHECK-DISABLE-NEXT:    xtn v0.4h, v0.4s
787; CHECK-DISABLE-NEXT:    uzp1 v1.8h, v1.8h, v2.8h
788; CHECK-DISABLE-NEXT:    uzp1 v1.16b, v1.16b, v0.16b
789; CHECK-DISABLE-NEXT:    uzp1 v0.8b, v0.8b, v0.8b
790; CHECK-DISABLE-NEXT:    rev16 v2.16b, v1.16b
791; CHECK-DISABLE-NEXT:    rev64 v1.16b, v1.16b
792; CHECK-DISABLE-NEXT:    st1 { v0.b }[2], [x9]
793; CHECK-DISABLE-NEXT:    add x9, x1, #8
794; CHECK-DISABLE-NEXT:    st1 { v2.h }[4], [x9]
795; CHECK-DISABLE-NEXT:    str d1, [x1], #16
796; CHECK-DISABLE-NEXT:    b.eq .LBB6_1
797; CHECK-DISABLE-NEXT:  // %bb.2: // %exit
798; CHECK-DISABLE-NEXT:    ret
799entry:
800  br label %loop
801
802loop:
803  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
804  %gep.A = getelementptr inbounds <11 x i64>, ptr %A, i64 %iv
805  %l.A = load <11 x i64>, ptr %gep.A
806  %trunc = trunc <11 x i64> %l.A to <11 x i8>
807  %gep.dst = getelementptr inbounds <11 x i8>, ptr %dst, i64 %iv
808  store <11 x i8> %trunc, ptr %gep.dst
809  %iv.next = add i64 %iv, 1
810  %ec = icmp eq i64 %iv.next, 1000
811  br i1 %ec, label %loop, label %exit
812
813exit:
814  ret void
815}
816
817define void @trunc_v16i16_to_v16i8_in_loop(ptr %A, ptr %dst) {
818; CHECK-LABEL: trunc_v16i16_to_v16i8_in_loop:
819; CHECK:       ; %bb.0: ; %entry
820; CHECK-NEXT:    mov x8, xzr
821; CHECK-NEXT:  LBB7_1: ; %loop
822; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
823; CHECK-NEXT:    add x9, x0, x8, lsl #5
824; CHECK-NEXT:    ldp q1, q0, [x9]
825; CHECK-NEXT:    uzp1.16b v0, v1, v0
826; CHECK-NEXT:    str q0, [x1, x8, lsl #4]
827; CHECK-NEXT:    add x8, x8, #1
828; CHECK-NEXT:    cmp x8, #1000
829; CHECK-NEXT:    b.eq LBB7_1
830; CHECK-NEXT:  ; %bb.2: ; %exit
831; CHECK-NEXT:    ret
832;
833; CHECK-BE-LABEL: trunc_v16i16_to_v16i8_in_loop:
834; CHECK-BE:       // %bb.0: // %entry
835; CHECK-BE-NEXT:    mov x8, xzr
836; CHECK-BE-NEXT:  .LBB7_1: // %loop
837; CHECK-BE-NEXT:    // =>This Inner Loop Header: Depth=1
838; CHECK-BE-NEXT:    add x9, x0, x8, lsl #5
839; CHECK-BE-NEXT:    add x10, x9, #16
840; CHECK-BE-NEXT:    ld1 { v0.8h }, [x9]
841; CHECK-BE-NEXT:    add x9, x1, x8, lsl #4
842; CHECK-BE-NEXT:    ld1 { v1.8h }, [x10]
843; CHECK-BE-NEXT:    add x8, x8, #1
844; CHECK-BE-NEXT:    cmp x8, #1000
845; CHECK-BE-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
846; CHECK-BE-NEXT:    st1 { v0.16b }, [x9]
847; CHECK-BE-NEXT:    b.eq .LBB7_1
848; CHECK-BE-NEXT:  // %bb.2: // %exit
849; CHECK-BE-NEXT:    ret
850;
851; CHECK-DISABLE-LABEL: trunc_v16i16_to_v16i8_in_loop:
852; CHECK-DISABLE:       // %bb.0: // %entry
853; CHECK-DISABLE-NEXT:    mov x8, xzr
854; CHECK-DISABLE-NEXT:  .LBB7_1: // %loop
855; CHECK-DISABLE-NEXT:    // =>This Inner Loop Header: Depth=1
856; CHECK-DISABLE-NEXT:    add x9, x0, x8, lsl #5
857; CHECK-DISABLE-NEXT:    add x10, x9, #16
858; CHECK-DISABLE-NEXT:    ld1 { v0.8h }, [x9]
859; CHECK-DISABLE-NEXT:    add x9, x1, x8, lsl #4
860; CHECK-DISABLE-NEXT:    ld1 { v1.8h }, [x10]
861; CHECK-DISABLE-NEXT:    add x8, x8, #1
862; CHECK-DISABLE-NEXT:    cmp x8, #1000
863; CHECK-DISABLE-NEXT:    uzp1 v0.16b, v0.16b, v1.16b
864; CHECK-DISABLE-NEXT:    st1 { v0.16b }, [x9]
865; CHECK-DISABLE-NEXT:    b.eq .LBB7_1
866; CHECK-DISABLE-NEXT:  // %bb.2: // %exit
867; CHECK-DISABLE-NEXT:    ret
868entry:
869  br label %loop
870
871loop:
872  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
873  %gep.A = getelementptr inbounds <16 x i16>, ptr %A, i64 %iv
874  %l.A = load <16 x i16>, ptr %gep.A
875  %trunc = trunc <16 x i16> %l.A to <16 x i8>
876  %gep.dst = getelementptr inbounds <16 x i8>, ptr %dst, i64 %iv
877  store <16 x i8> %trunc, ptr %gep.dst
878  %iv.next = add i64 %iv, 1
879  %ec = icmp eq i64 %iv.next, 1000
880  br i1 %ec, label %loop, label %exit
881
882exit:
883  ret void
884}
885
886define void @trunc_v8i16_to_v8i8_in_loop(ptr %A, ptr %dst) {
887; CHECK-LABEL: trunc_v8i16_to_v8i8_in_loop:
888; CHECK:       ; %bb.0: ; %entry
889; CHECK-NEXT:    mov x8, xzr
890; CHECK-NEXT:  LBB8_1: ; %loop
891; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
892; CHECK-NEXT:    ldr q0, [x0, x8, lsl #4]
893; CHECK-NEXT:    xtn.8b v0, v0
894; CHECK-NEXT:    str d0, [x1, x8, lsl #3]
895; CHECK-NEXT:    add x8, x8, #1
896; CHECK-NEXT:    cmp x8, #1000
897; CHECK-NEXT:    b.eq LBB8_1
898; CHECK-NEXT:  ; %bb.2: ; %exit
899; CHECK-NEXT:    ret
900;
901; CHECK-BE-LABEL: trunc_v8i16_to_v8i8_in_loop:
902; CHECK-BE:       // %bb.0: // %entry
903; CHECK-BE-NEXT:    mov x8, xzr
904; CHECK-BE-NEXT:  .LBB8_1: // %loop
905; CHECK-BE-NEXT:    // =>This Inner Loop Header: Depth=1
906; CHECK-BE-NEXT:    add x9, x0, x8, lsl #4
907; CHECK-BE-NEXT:    ld1 { v0.8h }, [x9]
908; CHECK-BE-NEXT:    add x9, x1, x8, lsl #3
909; CHECK-BE-NEXT:    add x8, x8, #1
910; CHECK-BE-NEXT:    cmp x8, #1000
911; CHECK-BE-NEXT:    xtn v0.8b, v0.8h
912; CHECK-BE-NEXT:    st1 { v0.8b }, [x9]
913; CHECK-BE-NEXT:    b.eq .LBB8_1
914; CHECK-BE-NEXT:  // %bb.2: // %exit
915; CHECK-BE-NEXT:    ret
916;
917; CHECK-DISABLE-LABEL: trunc_v8i16_to_v8i8_in_loop:
918; CHECK-DISABLE:       // %bb.0: // %entry
919; CHECK-DISABLE-NEXT:    mov x8, xzr
920; CHECK-DISABLE-NEXT:  .LBB8_1: // %loop
921; CHECK-DISABLE-NEXT:    // =>This Inner Loop Header: Depth=1
922; CHECK-DISABLE-NEXT:    add x9, x0, x8, lsl #4
923; CHECK-DISABLE-NEXT:    ld1 { v0.8h }, [x9]
924; CHECK-DISABLE-NEXT:    add x9, x1, x8, lsl #3
925; CHECK-DISABLE-NEXT:    add x8, x8, #1
926; CHECK-DISABLE-NEXT:    cmp x8, #1000
927; CHECK-DISABLE-NEXT:    xtn v0.8b, v0.8h
928; CHECK-DISABLE-NEXT:    st1 { v0.8b }, [x9]
929; CHECK-DISABLE-NEXT:    b.eq .LBB8_1
930; CHECK-DISABLE-NEXT:  // %bb.2: // %exit
931; CHECK-DISABLE-NEXT:    ret
932entry:
933  br label %loop
934
935loop:
936  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
937  %gep.A = getelementptr inbounds <8 x i16>, ptr %A, i64 %iv
938  %l.A = load <8 x i16>, ptr %gep.A
939  %trunc = trunc <8 x i16> %l.A to <8 x i8>
940  %gep.dst = getelementptr inbounds <8 x i8>, ptr %dst, i64 %iv
941  store <8 x i8> %trunc, ptr %gep.dst
942  %iv.next = add i64 %iv, 1
943  %ec = icmp eq i64 %iv.next, 1000
944  br i1 %ec, label %loop, label %exit
945
946exit:
947  ret void
948}
949
950