xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll (revision 61510b51c33464a6bc15e4cf5b1ee07e2e0ec1c9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s
3; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
4; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
5
6
7target triple = "aarch64-unknown-linux-gnu"
8
9;
10; truncate i16 -> i8
11;
12
13define <16 x i8> @trunc_v16i16_v16i8(ptr %in) nounwind {
14; CHECK-LABEL: trunc_v16i16_v16i8:
15; CHECK:       // %bb.0:
16; CHECK-NEXT:    ldp q1, q0, [x0]
17; CHECK-NEXT:    ptrue p0.b, vl8
18; CHECK-NEXT:    uzp1 z3.b, z0.b, z0.b
19; CHECK-NEXT:    uzp1 z2.b, z1.b, z1.b
20; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
21; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
22; CHECK-NEXT:    ret
23;
24; NONEON-NOSVE-LABEL: trunc_v16i16_v16i8:
25; NONEON-NOSVE:       // %bb.0:
26; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
27; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-48]!
28; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #30]
29; NONEON-NOSVE-NEXT:    strb w8, [sp, #47]
30; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #28]
31; NONEON-NOSVE-NEXT:    strb w8, [sp, #46]
32; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #26]
33; NONEON-NOSVE-NEXT:    strb w8, [sp, #45]
34; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #24]
35; NONEON-NOSVE-NEXT:    strb w8, [sp, #44]
36; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #22]
37; NONEON-NOSVE-NEXT:    strb w8, [sp, #43]
38; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #20]
39; NONEON-NOSVE-NEXT:    strb w8, [sp, #42]
40; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #18]
41; NONEON-NOSVE-NEXT:    strb w8, [sp, #41]
42; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #16]
43; NONEON-NOSVE-NEXT:    strb w8, [sp, #40]
44; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #14]
45; NONEON-NOSVE-NEXT:    strb w8, [sp, #39]
46; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #12]
47; NONEON-NOSVE-NEXT:    strb w8, [sp, #38]
48; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #10]
49; NONEON-NOSVE-NEXT:    strb w8, [sp, #37]
50; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #8]
51; NONEON-NOSVE-NEXT:    strb w8, [sp, #36]
52; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #6]
53; NONEON-NOSVE-NEXT:    strb w8, [sp, #35]
54; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #4]
55; NONEON-NOSVE-NEXT:    strb w8, [sp, #34]
56; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #2]
57; NONEON-NOSVE-NEXT:    strb w8, [sp, #33]
58; NONEON-NOSVE-NEXT:    ldrh w8, [sp]
59; NONEON-NOSVE-NEXT:    strb w8, [sp, #32]
60; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
61; NONEON-NOSVE-NEXT:    add sp, sp, #48
62; NONEON-NOSVE-NEXT:    ret
63  %a = load <16 x i16>, ptr %in
64  %b = trunc <16 x i16> %a to <16 x i8>
65  ret <16 x i8> %b
66}
67
68; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
69define void @trunc_v32i16_v32i8(ptr %in, ptr %out) nounwind {
70; CHECK-LABEL: trunc_v32i16_v32i8:
71; CHECK:       // %bb.0:
72; CHECK-NEXT:    ldp q1, q0, [x0, #32]
73; CHECK-NEXT:    ptrue p0.b, vl8
74; CHECK-NEXT:    ldp q3, q2, [x0]
75; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
76; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
77; CHECK-NEXT:    uzp1 z1.b, z2.b, z2.b
78; CHECK-NEXT:    uzp1 z0.b, z3.b, z3.b
79; CHECK-NEXT:    splice z2.b, p0, { z4.b, z5.b }
80; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
81; CHECK-NEXT:    add z1.b, z2.b, z2.b
82; CHECK-NEXT:    add z0.b, z0.b, z0.b
83; CHECK-NEXT:    stp q0, q1, [x1]
84; CHECK-NEXT:    ret
85;
86; NONEON-NOSVE-LABEL: trunc_v32i16_v32i8:
87; NONEON-NOSVE:       // %bb.0:
88; NONEON-NOSVE-NEXT:    sub sp, sp, #208
89; NONEON-NOSVE-NEXT:    ldp q0, q1, [x0, #32]
90; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #112] // 16-byte Folded Spill
91; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0]
92; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #128] // 16-byte Folded Spill
93; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #144] // 16-byte Folded Spill
94; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #160] // 16-byte Folded Spill
95; NONEON-NOSVE-NEXT:    stp q3, q1, [sp, #16]
96; NONEON-NOSVE-NEXT:    stp q2, q0, [sp, #48]
97; NONEON-NOSVE-NEXT:    ldrh w25, [sp, #28]
98; NONEON-NOSVE-NEXT:    ldrh w26, [sp, #30]
99; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #64]
100; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #66]
101; NONEON-NOSVE-NEXT:    ldrh w29, [sp, #52]
102; NONEON-NOSVE-NEXT:    ldrh w27, [sp, #48]
103; NONEON-NOSVE-NEXT:    ldrh w28, [sp, #50]
104; NONEON-NOSVE-NEXT:    ldrh w23, [sp, #24]
105; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #8] // 8-byte Folded Spill
106; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #56]
107; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #54]
108; NONEON-NOSVE-NEXT:    ldrh w24, [sp, #26]
109; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #176] // 16-byte Folded Spill
110; NONEON-NOSVE-NEXT:    ldrh w21, [sp, #20]
111; NONEON-NOSVE-NEXT:    add w8, w8, w8
112; NONEON-NOSVE-NEXT:    add w9, w9, w9
113; NONEON-NOSVE-NEXT:    ldrh w22, [sp, #22]
114; NONEON-NOSVE-NEXT:    strb w8, [sp, #92]
115; NONEON-NOSVE-NEXT:    add w8, w29, w29
116; NONEON-NOSVE-NEXT:    ldrh w4, [sp, #44]
117; NONEON-NOSVE-NEXT:    strb w9, [sp, #91]
118; NONEON-NOSVE-NEXT:    add w9, w28, w28
119; NONEON-NOSVE-NEXT:    ldrh w7, [sp, #46]
120; NONEON-NOSVE-NEXT:    strb w8, [sp, #90]
121; NONEON-NOSVE-NEXT:    add w8, w27, w27
122; NONEON-NOSVE-NEXT:    ldrh w2, [sp, #40]
123; NONEON-NOSVE-NEXT:    strb w9, [sp, #89]
124; NONEON-NOSVE-NEXT:    add w9, w26, w26
125; NONEON-NOSVE-NEXT:    ldrh w3, [sp, #42]
126; NONEON-NOSVE-NEXT:    strb w8, [sp, #88]
127; NONEON-NOSVE-NEXT:    add w8, w25, w25
128; NONEON-NOSVE-NEXT:    ldrh w18, [sp, #36]
129; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #192] // 16-byte Folded Spill
130; NONEON-NOSVE-NEXT:    ldrh w19, [sp, #16]
131; NONEON-NOSVE-NEXT:    ldrh w20, [sp, #18]
132; NONEON-NOSVE-NEXT:    strb w9, [sp, #87]
133; NONEON-NOSVE-NEXT:    add w9, w24, w24
134; NONEON-NOSVE-NEXT:    ldrh w0, [sp, #38]
135; NONEON-NOSVE-NEXT:    strb w8, [sp, #86]
136; NONEON-NOSVE-NEXT:    add w8, w23, w23
137; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #60]
138; NONEON-NOSVE-NEXT:    strb w9, [sp, #85]
139; NONEON-NOSVE-NEXT:    add w9, w22, w22
140; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #62]
141; NONEON-NOSVE-NEXT:    add w6, w12, w12
142; NONEON-NOSVE-NEXT:    strb w8, [sp, #84]
143; NONEON-NOSVE-NEXT:    add w8, w21, w21
144; NONEON-NOSVE-NEXT:    add w5, w13, w13
145; NONEON-NOSVE-NEXT:    strb w9, [sp, #83]
146; NONEON-NOSVE-NEXT:    add w9, w20, w20
147; NONEON-NOSVE-NEXT:    strb w8, [sp, #82]
148; NONEON-NOSVE-NEXT:    add w8, w19, w19
149; NONEON-NOSVE-NEXT:    ldrh w16, [sp, #32]
150; NONEON-NOSVE-NEXT:    strb w9, [sp, #81]
151; NONEON-NOSVE-NEXT:    add w9, w7, w7
152; NONEON-NOSVE-NEXT:    ldrh w17, [sp, #34]
153; NONEON-NOSVE-NEXT:    strb w8, [sp, #80]
154; NONEON-NOSVE-NEXT:    add w8, w4, w4
155; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #76]
156; NONEON-NOSVE-NEXT:    strb w9, [sp, #111]
157; NONEON-NOSVE-NEXT:    add w9, w3, w3
158; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #78]
159; NONEON-NOSVE-NEXT:    strb w8, [sp, #110]
160; NONEON-NOSVE-NEXT:    add w8, w2, w2
161; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #72]
162; NONEON-NOSVE-NEXT:    strb w9, [sp, #109]
163; NONEON-NOSVE-NEXT:    add w9, w0, w0
164; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #74]
165; NONEON-NOSVE-NEXT:    strb w8, [sp, #108]
166; NONEON-NOSVE-NEXT:    add w8, w18, w18
167; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #68]
168; NONEON-NOSVE-NEXT:    strb w9, [sp, #107]
169; NONEON-NOSVE-NEXT:    add w9, w17, w17
170; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #70]
171; NONEON-NOSVE-NEXT:    strb w8, [sp, #106]
172; NONEON-NOSVE-NEXT:    add w8, w16, w16
173; NONEON-NOSVE-NEXT:    ldrh w30, [sp, #58]
174; NONEON-NOSVE-NEXT:    strb w9, [sp, #105]
175; NONEON-NOSVE-NEXT:    add w9, w15, w15
176; NONEON-NOSVE-NEXT:    strb w8, [sp, #104]
177; NONEON-NOSVE-NEXT:    add w8, w14, w14
178; NONEON-NOSVE-NEXT:    strb w9, [sp, #103]
179; NONEON-NOSVE-NEXT:    add w9, w13, w13
180; NONEON-NOSVE-NEXT:    strb w8, [sp, #102]
181; NONEON-NOSVE-NEXT:    add w8, w12, w12
182; NONEON-NOSVE-NEXT:    strb w9, [sp, #101]
183; NONEON-NOSVE-NEXT:    add w9, w11, w11
184; NONEON-NOSVE-NEXT:    strb w8, [sp, #100]
185; NONEON-NOSVE-NEXT:    add w8, w10, w10
186; NONEON-NOSVE-NEXT:    strb w9, [sp, #99]
187; NONEON-NOSVE-NEXT:    ldr w9, [sp, #8] // 4-byte Folded Reload
188; NONEON-NOSVE-NEXT:    strb w8, [sp, #98]
189; NONEON-NOSVE-NEXT:    ldr w8, [sp, #12] // 4-byte Folded Reload
190; NONEON-NOSVE-NEXT:    strb w5, [sp, #95]
191; NONEON-NOSVE-NEXT:    add w5, w30, w30
192; NONEON-NOSVE-NEXT:    add w9, w9, w9
193; NONEON-NOSVE-NEXT:    add w8, w8, w8
194; NONEON-NOSVE-NEXT:    strb w6, [sp, #94]
195; NONEON-NOSVE-NEXT:    strb w5, [sp, #93]
196; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #192] // 16-byte Folded Reload
197; NONEON-NOSVE-NEXT:    strb w9, [sp, #97]
198; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #176] // 16-byte Folded Reload
199; NONEON-NOSVE-NEXT:    strb w8, [sp, #96]
200; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #160] // 16-byte Folded Reload
201; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #80]
202; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #144] // 16-byte Folded Reload
203; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #128] // 16-byte Folded Reload
204; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #112] // 16-byte Folded Reload
205; NONEON-NOSVE-NEXT:    stp q1, q0, [x1]
206; NONEON-NOSVE-NEXT:    add sp, sp, #208
207; NONEON-NOSVE-NEXT:    ret
208  %a = load <32 x i16>, ptr %in
209  %b = trunc <32 x i16> %a to <32 x i8>
210  %c = add <32 x i8> %b, %b
211  store <32 x i8> %c, ptr %out
212  ret void
213}
214
215; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
216define void @trunc_v64i16_v64i8(ptr %in, ptr %out) nounwind {
217; CHECK-LABEL: trunc_v64i16_v64i8:
218; CHECK:       // %bb.0:
219; CHECK-NEXT:    ldp q1, q0, [x0, #64]
220; CHECK-NEXT:    ptrue p0.b, vl8
221; CHECK-NEXT:    ldp q2, q3, [x0, #96]
222; CHECK-NEXT:    ldp q4, q5, [x0]
223; CHECK-NEXT:    uzp1 z7.b, z0.b, z0.b
224; CHECK-NEXT:    uzp1 z6.b, z1.b, z1.b
225; CHECK-NEXT:    ldp q1, q0, [x0, #32]
226; CHECK-NEXT:    uzp1 z17.b, z3.b, z3.b
227; CHECK-NEXT:    uzp1 z16.b, z2.b, z2.b
228; CHECK-NEXT:    uzp1 z3.b, z5.b, z5.b
229; CHECK-NEXT:    uzp1 z2.b, z4.b, z4.b
230; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
231; CHECK-NEXT:    splice z0.b, p0, { z6.b, z7.b }
232; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
233; CHECK-NEXT:    splice z1.b, p0, { z16.b, z17.b }
234; CHECK-NEXT:    splice z2.b, p0, { z2.b, z3.b }
235; CHECK-NEXT:    splice z3.b, p0, { z4.b, z5.b }
236; CHECK-NEXT:    add z0.b, z0.b, z0.b
237; CHECK-NEXT:    add z1.b, z1.b, z1.b
238; CHECK-NEXT:    add z2.b, z2.b, z2.b
239; CHECK-NEXT:    add z3.b, z3.b, z3.b
240; CHECK-NEXT:    stp q0, q1, [x1, #32]
241; CHECK-NEXT:    stp q2, q3, [x1]
242; CHECK-NEXT:    ret
243;
244; NONEON-NOSVE-LABEL: trunc_v64i16_v64i8:
245; NONEON-NOSVE:       // %bb.0:
246; NONEON-NOSVE-NEXT:    sub sp, sp, #448
247; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0, #32]
248; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #416] // 16-byte Folded Spill
249; NONEON-NOSVE-NEXT:    ldp q5, q4, [x0]
250; NONEON-NOSVE-NEXT:    str x1, [sp, #152] // 8-byte Folded Spill
251; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #432] // 16-byte Folded Spill
252; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0, #64]
253; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #400] // 16-byte Folded Spill
254; NONEON-NOSVE-NEXT:    ldp q7, q6, [x0, #96]
255; NONEON-NOSVE-NEXT:    stp q2, q4, [sp, #224]
256; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #238]
257; NONEON-NOSVE-NEXT:    stp q3, q1, [sp, #256]
258; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #232]
259; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #272]
260; NONEON-NOSVE-NEXT:    stp q5, q7, [sp, #160]
261; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #230]
262; NONEON-NOSVE-NEXT:    add w21, w8, w8
263; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #274]
264; NONEON-NOSVE-NEXT:    stp q6, q0, [sp, #192]
265; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #228]
266; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #226]
267; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #224]
268; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #144] // 8-byte Folded Spill
269; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #276]
270; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #278]
271; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #270]
272; NONEON-NOSVE-NEXT:    ldrh w16, [sp, #268]
273; NONEON-NOSVE-NEXT:    ldrh w17, [sp, #266]
274; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #136] // 8-byte Folded Spill
275; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #280]
276; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #282]
277; NONEON-NOSVE-NEXT:    ldrh w18, [sp, #264]
278; NONEON-NOSVE-NEXT:    ldrh w0, [sp, #262]
279; NONEON-NOSVE-NEXT:    ldrh w1, [sp, #260]
280; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128] // 8-byte Folded Spill
281; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #284]
282; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #286]
283; NONEON-NOSVE-NEXT:    ldrh w2, [sp, #258]
284; NONEON-NOSVE-NEXT:    ldrh w3, [sp, #256]
285; NONEON-NOSVE-NEXT:    ldrh w4, [sp, #254]
286; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120] // 8-byte Folded Spill
287; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #208]
288; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #210]
289; NONEON-NOSVE-NEXT:    ldrh w5, [sp, #252]
290; NONEON-NOSVE-NEXT:    ldrh w6, [sp, #250]
291; NONEON-NOSVE-NEXT:    ldrh w7, [sp, #248]
292; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112] // 8-byte Folded Spill
293; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #212]
294; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #214]
295; NONEON-NOSVE-NEXT:    ldrh w19, [sp, #246]
296; NONEON-NOSVE-NEXT:    ldrh w20, [sp, #244]
297; NONEON-NOSVE-NEXT:    ldrh w22, [sp, #242]
298; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104] // 8-byte Folded Spill
299; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #216]
300; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #218]
301; NONEON-NOSVE-NEXT:    ldrh w23, [sp, #240]
302; NONEON-NOSVE-NEXT:    ldrh w24, [sp, #174]
303; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #384] // 16-byte Folded Spill
304; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #96] // 8-byte Folded Spill
305; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #220]
306; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #222]
307; NONEON-NOSVE-NEXT:    ldrh w25, [sp, #172]
308; NONEON-NOSVE-NEXT:    ldrh w26, [sp, #170]
309; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #368] // 16-byte Folded Spill
310; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #88] // 8-byte Folded Spill
311; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #176]
312; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #178]
313; NONEON-NOSVE-NEXT:    ldrh w27, [sp, #168]
314; NONEON-NOSVE-NEXT:    ldrh w28, [sp, #166]
315; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #352] // 16-byte Folded Spill
316; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #80] // 8-byte Folded Spill
317; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #180]
318; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #182]
319; NONEON-NOSVE-NEXT:    ldrh w29, [sp, #164]
320; NONEON-NOSVE-NEXT:    ldrh w30, [sp, #162]
321; NONEON-NOSVE-NEXT:    strb w21, [sp, #335]
322; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #72] // 8-byte Folded Spill
323; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #184]
324; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #186]
325; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #64] // 8-byte Folded Spill
326; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #188]
327; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #190]
328; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56] // 8-byte Folded Spill
329; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #192]
330; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #194]
331; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48] // 8-byte Folded Spill
332; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #196]
333; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #198]
334; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40] // 8-byte Folded Spill
335; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #200]
336; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #202]
337; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32] // 8-byte Folded Spill
338; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #204]
339; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #206]
340; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24] // 8-byte Folded Spill
341; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #160]
342; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #236]
343; NONEON-NOSVE-NEXT:    add w9, w9, w9
344; NONEON-NOSVE-NEXT:    str w8, [sp, #20] // 4-byte Folded Spill
345; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #234]
346; NONEON-NOSVE-NEXT:    strb w9, [sp, #334]
347; NONEON-NOSVE-NEXT:    add w8, w8, w8
348; NONEON-NOSVE-NEXT:    strb w8, [sp, #333]
349; NONEON-NOSVE-NEXT:    add w8, w10, w10
350; NONEON-NOSVE-NEXT:    strb w8, [sp, #332]
351; NONEON-NOSVE-NEXT:    add w8, w11, w11
352; NONEON-NOSVE-NEXT:    strb w8, [sp, #331]
353; NONEON-NOSVE-NEXT:    add w8, w12, w12
354; NONEON-NOSVE-NEXT:    strb w8, [sp, #330]
355; NONEON-NOSVE-NEXT:    add w8, w13, w13
356; NONEON-NOSVE-NEXT:    strb w8, [sp, #329]
357; NONEON-NOSVE-NEXT:    add w8, w14, w14
358; NONEON-NOSVE-NEXT:    strb w8, [sp, #328]
359; NONEON-NOSVE-NEXT:    add w8, w15, w15
360; NONEON-NOSVE-NEXT:    strb w8, [sp, #327]
361; NONEON-NOSVE-NEXT:    add w8, w16, w16
362; NONEON-NOSVE-NEXT:    strb w8, [sp, #326]
363; NONEON-NOSVE-NEXT:    add w8, w17, w17
364; NONEON-NOSVE-NEXT:    strb w8, [sp, #325]
365; NONEON-NOSVE-NEXT:    add w8, w18, w18
366; NONEON-NOSVE-NEXT:    strb w8, [sp, #324]
367; NONEON-NOSVE-NEXT:    add w8, w0, w0
368; NONEON-NOSVE-NEXT:    strb w8, [sp, #323]
369; NONEON-NOSVE-NEXT:    add w8, w1, w1
370; NONEON-NOSVE-NEXT:    strb w8, [sp, #322]
371; NONEON-NOSVE-NEXT:    add w8, w2, w2
372; NONEON-NOSVE-NEXT:    strb w8, [sp, #321]
373; NONEON-NOSVE-NEXT:    add w8, w3, w3
374; NONEON-NOSVE-NEXT:    strb w8, [sp, #320]
375; NONEON-NOSVE-NEXT:    add w8, w4, w4
376; NONEON-NOSVE-NEXT:    strb w8, [sp, #319]
377; NONEON-NOSVE-NEXT:    add w8, w5, w5
378; NONEON-NOSVE-NEXT:    strb w8, [sp, #318]
379; NONEON-NOSVE-NEXT:    add w8, w6, w6
380; NONEON-NOSVE-NEXT:    strb w8, [sp, #317]
381; NONEON-NOSVE-NEXT:    add w8, w7, w7
382; NONEON-NOSVE-NEXT:    strb w8, [sp, #316]
383; NONEON-NOSVE-NEXT:    add w8, w19, w19
384; NONEON-NOSVE-NEXT:    strb w8, [sp, #315]
385; NONEON-NOSVE-NEXT:    add w8, w20, w20
386; NONEON-NOSVE-NEXT:    strb w8, [sp, #314]
387; NONEON-NOSVE-NEXT:    add w8, w22, w22
388; NONEON-NOSVE-NEXT:    strb w8, [sp, #313]
389; NONEON-NOSVE-NEXT:    add w8, w23, w23
390; NONEON-NOSVE-NEXT:    strb w8, [sp, #312]
391; NONEON-NOSVE-NEXT:    add w8, w24, w24
392; NONEON-NOSVE-NEXT:    strb w8, [sp, #311]
393; NONEON-NOSVE-NEXT:    add w8, w25, w25
394; NONEON-NOSVE-NEXT:    strb w8, [sp, #310]
395; NONEON-NOSVE-NEXT:    add w8, w26, w26
396; NONEON-NOSVE-NEXT:    strb w8, [sp, #309]
397; NONEON-NOSVE-NEXT:    add w8, w27, w27
398; NONEON-NOSVE-NEXT:    strb w8, [sp, #308]
399; NONEON-NOSVE-NEXT:    add w8, w28, w28
400; NONEON-NOSVE-NEXT:    strb w8, [sp, #307]
401; NONEON-NOSVE-NEXT:    add w8, w29, w29
402; NONEON-NOSVE-NEXT:    strb w8, [sp, #306]
403; NONEON-NOSVE-NEXT:    add w8, w30, w30
404; NONEON-NOSVE-NEXT:    strb w8, [sp, #305]
405; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20] // 4-byte Folded Reload
406; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #432] // 16-byte Folded Reload
407; NONEON-NOSVE-NEXT:    add w8, w8, w8
408; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #416] // 16-byte Folded Reload
409; NONEON-NOSVE-NEXT:    strb w8, [sp, #304]
410; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24] // 4-byte Folded Reload
411; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #400] // 16-byte Folded Reload
412; NONEON-NOSVE-NEXT:    add w8, w8, w8
413; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #384] // 16-byte Folded Reload
414; NONEON-NOSVE-NEXT:    strb w8, [sp, #303]
415; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28] // 4-byte Folded Reload
416; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #368] // 16-byte Folded Reload
417; NONEON-NOSVE-NEXT:    add w8, w8, w8
418; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #352] // 16-byte Folded Reload
419; NONEON-NOSVE-NEXT:    strb w8, [sp, #302]
420; NONEON-NOSVE-NEXT:    ldr w8, [sp, #32] // 4-byte Folded Reload
421; NONEON-NOSVE-NEXT:    add w8, w8, w8
422; NONEON-NOSVE-NEXT:    strb w8, [sp, #301]
423; NONEON-NOSVE-NEXT:    ldr w8, [sp, #36] // 4-byte Folded Reload
424; NONEON-NOSVE-NEXT:    add w8, w8, w8
425; NONEON-NOSVE-NEXT:    strb w8, [sp, #300]
426; NONEON-NOSVE-NEXT:    ldr w8, [sp, #40] // 4-byte Folded Reload
427; NONEON-NOSVE-NEXT:    add w8, w8, w8
428; NONEON-NOSVE-NEXT:    strb w8, [sp, #299]
429; NONEON-NOSVE-NEXT:    ldr w8, [sp, #44] // 4-byte Folded Reload
430; NONEON-NOSVE-NEXT:    add w8, w8, w8
431; NONEON-NOSVE-NEXT:    strb w8, [sp, #298]
432; NONEON-NOSVE-NEXT:    ldr w8, [sp, #48] // 4-byte Folded Reload
433; NONEON-NOSVE-NEXT:    add w8, w8, w8
434; NONEON-NOSVE-NEXT:    strb w8, [sp, #297]
435; NONEON-NOSVE-NEXT:    ldr w8, [sp, #52] // 4-byte Folded Reload
436; NONEON-NOSVE-NEXT:    add w8, w8, w8
437; NONEON-NOSVE-NEXT:    strb w8, [sp, #296]
438; NONEON-NOSVE-NEXT:    ldr w8, [sp, #56] // 4-byte Folded Reload
439; NONEON-NOSVE-NEXT:    add w8, w8, w8
440; NONEON-NOSVE-NEXT:    strb w8, [sp, #295]
441; NONEON-NOSVE-NEXT:    ldr w8, [sp, #60] // 4-byte Folded Reload
442; NONEON-NOSVE-NEXT:    add w8, w8, w8
443; NONEON-NOSVE-NEXT:    strb w8, [sp, #294]
444; NONEON-NOSVE-NEXT:    ldr w8, [sp, #64] // 4-byte Folded Reload
445; NONEON-NOSVE-NEXT:    add w8, w8, w8
446; NONEON-NOSVE-NEXT:    strb w8, [sp, #293]
447; NONEON-NOSVE-NEXT:    ldr w8, [sp, #68] // 4-byte Folded Reload
448; NONEON-NOSVE-NEXT:    add w8, w8, w8
449; NONEON-NOSVE-NEXT:    strb w8, [sp, #292]
450; NONEON-NOSVE-NEXT:    ldr w8, [sp, #72] // 4-byte Folded Reload
451; NONEON-NOSVE-NEXT:    add w8, w8, w8
452; NONEON-NOSVE-NEXT:    strb w8, [sp, #291]
453; NONEON-NOSVE-NEXT:    ldr w8, [sp, #76] // 4-byte Folded Reload
454; NONEON-NOSVE-NEXT:    add w8, w8, w8
455; NONEON-NOSVE-NEXT:    strb w8, [sp, #290]
456; NONEON-NOSVE-NEXT:    ldr w8, [sp, #80] // 4-byte Folded Reload
457; NONEON-NOSVE-NEXT:    add w8, w8, w8
458; NONEON-NOSVE-NEXT:    strb w8, [sp, #289]
459; NONEON-NOSVE-NEXT:    ldr w8, [sp, #84] // 4-byte Folded Reload
460; NONEON-NOSVE-NEXT:    add w8, w8, w8
461; NONEON-NOSVE-NEXT:    strb w8, [sp, #288]
462; NONEON-NOSVE-NEXT:    ldr w8, [sp, #88] // 4-byte Folded Reload
463; NONEON-NOSVE-NEXT:    ldp q1, q3, [sp, #288]
464; NONEON-NOSVE-NEXT:    add w8, w8, w8
465; NONEON-NOSVE-NEXT:    strb w8, [sp, #351]
466; NONEON-NOSVE-NEXT:    ldr w8, [sp, #92] // 4-byte Folded Reload
467; NONEON-NOSVE-NEXT:    add w8, w8, w8
468; NONEON-NOSVE-NEXT:    strb w8, [sp, #350]
469; NONEON-NOSVE-NEXT:    ldr w8, [sp, #96] // 4-byte Folded Reload
470; NONEON-NOSVE-NEXT:    add w8, w8, w8
471; NONEON-NOSVE-NEXT:    strb w8, [sp, #349]
472; NONEON-NOSVE-NEXT:    ldr w8, [sp, #100] // 4-byte Folded Reload
473; NONEON-NOSVE-NEXT:    add w8, w8, w8
474; NONEON-NOSVE-NEXT:    strb w8, [sp, #348]
475; NONEON-NOSVE-NEXT:    ldr w8, [sp, #104] // 4-byte Folded Reload
476; NONEON-NOSVE-NEXT:    add w8, w8, w8
477; NONEON-NOSVE-NEXT:    strb w8, [sp, #347]
478; NONEON-NOSVE-NEXT:    ldr w8, [sp, #108] // 4-byte Folded Reload
479; NONEON-NOSVE-NEXT:    add w8, w8, w8
480; NONEON-NOSVE-NEXT:    strb w8, [sp, #346]
481; NONEON-NOSVE-NEXT:    ldr w8, [sp, #112] // 4-byte Folded Reload
482; NONEON-NOSVE-NEXT:    add w8, w8, w8
483; NONEON-NOSVE-NEXT:    strb w8, [sp, #345]
484; NONEON-NOSVE-NEXT:    ldr w8, [sp, #116] // 4-byte Folded Reload
485; NONEON-NOSVE-NEXT:    add w8, w8, w8
486; NONEON-NOSVE-NEXT:    strb w8, [sp, #344]
487; NONEON-NOSVE-NEXT:    ldr w8, [sp, #120] // 4-byte Folded Reload
488; NONEON-NOSVE-NEXT:    add w8, w8, w8
489; NONEON-NOSVE-NEXT:    strb w8, [sp, #343]
490; NONEON-NOSVE-NEXT:    ldr w8, [sp, #124] // 4-byte Folded Reload
491; NONEON-NOSVE-NEXT:    add w8, w8, w8
492; NONEON-NOSVE-NEXT:    strb w8, [sp, #342]
493; NONEON-NOSVE-NEXT:    ldr w8, [sp, #128] // 4-byte Folded Reload
494; NONEON-NOSVE-NEXT:    add w8, w8, w8
495; NONEON-NOSVE-NEXT:    strb w8, [sp, #341]
496; NONEON-NOSVE-NEXT:    ldr w8, [sp, #132] // 4-byte Folded Reload
497; NONEON-NOSVE-NEXT:    add w8, w8, w8
498; NONEON-NOSVE-NEXT:    strb w8, [sp, #340]
499; NONEON-NOSVE-NEXT:    ldr w8, [sp, #136] // 4-byte Folded Reload
500; NONEON-NOSVE-NEXT:    add w8, w8, w8
501; NONEON-NOSVE-NEXT:    strb w8, [sp, #339]
502; NONEON-NOSVE-NEXT:    ldr w8, [sp, #140] // 4-byte Folded Reload
503; NONEON-NOSVE-NEXT:    add w8, w8, w8
504; NONEON-NOSVE-NEXT:    strb w8, [sp, #338]
505; NONEON-NOSVE-NEXT:    ldr w8, [sp, #144] // 4-byte Folded Reload
506; NONEON-NOSVE-NEXT:    add w8, w8, w8
507; NONEON-NOSVE-NEXT:    strb w8, [sp, #337]
508; NONEON-NOSVE-NEXT:    ldr w8, [sp, #148] // 4-byte Folded Reload
509; NONEON-NOSVE-NEXT:    add w8, w8, w8
510; NONEON-NOSVE-NEXT:    strb w8, [sp, #336]
511; NONEON-NOSVE-NEXT:    ldr x8, [sp, #152] // 8-byte Folded Reload
512; NONEON-NOSVE-NEXT:    ldp q2, q0, [sp, #320]
513; NONEON-NOSVE-NEXT:    stp q3, q2, [x8]
514; NONEON-NOSVE-NEXT:    stp q0, q1, [x8, #32]
515; NONEON-NOSVE-NEXT:    add sp, sp, #448
516; NONEON-NOSVE-NEXT:    ret
517  %a = load <64 x i16>, ptr %in
518  %b = trunc <64 x i16> %a to <64 x i8>
519  %c = add <64 x i8> %b, %b
520  store <64 x i8> %c, ptr %out
521  ret void
522}
523
524; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
525define void @trunc_v128i16_v128i8(ptr %in, ptr %out) nounwind {
526; CHECK-LABEL: trunc_v128i16_v128i8:
527; CHECK:       // %bb.0:
528; CHECK-NEXT:    ldp q2, q3, [x0, #192]
529; CHECK-NEXT:    ptrue p0.b, vl8
530; CHECK-NEXT:    ldp q4, q5, [x0]
531; CHECK-NEXT:    ldp q6, q7, [x0, #64]
532; CHECK-NEXT:    uzp1 z17.b, z3.b, z3.b
533; CHECK-NEXT:    ldp q3, q18, [x0, #224]
534; CHECK-NEXT:    uzp1 z16.b, z2.b, z2.b
535; CHECK-NEXT:    ldp q2, q19, [x0, #128]
536; CHECK-NEXT:    ldp q0, q1, [x0, #32]
537; CHECK-NEXT:    uzp1 z21.b, z18.b, z18.b
538; CHECK-NEXT:    ldp q18, q22, [x0, #160]
539; CHECK-NEXT:    uzp1 z20.b, z3.b, z3.b
540; CHECK-NEXT:    uzp1 z24.b, z19.b, z19.b
541; CHECK-NEXT:    ldp q3, q19, [x0, #96]
542; CHECK-NEXT:    uzp1 z23.b, z2.b, z2.b
543; CHECK-NEXT:    uzp1 z26.b, z22.b, z22.b
544; CHECK-NEXT:    splice z2.b, p0, { z16.b, z17.b }
545; CHECK-NEXT:    uzp1 z17.b, z7.b, z7.b
546; CHECK-NEXT:    uzp1 z25.b, z18.b, z18.b
547; CHECK-NEXT:    splice z7.b, p0, { z20.b, z21.b }
548; CHECK-NEXT:    uzp1 z21.b, z5.b, z5.b
549; CHECK-NEXT:    uzp1 z19.b, z19.b, z19.b
550; CHECK-NEXT:    uzp1 z20.b, z4.b, z4.b
551; CHECK-NEXT:    uzp1 z5.b, z1.b, z1.b
552; CHECK-NEXT:    uzp1 z16.b, z6.b, z6.b
553; CHECK-NEXT:    splice z6.b, p0, { z23.b, z24.b }
554; CHECK-NEXT:    uzp1 z18.b, z3.b, z3.b
555; CHECK-NEXT:    splice z3.b, p0, { z25.b, z26.b }
556; CHECK-NEXT:    uzp1 z4.b, z0.b, z0.b
557; CHECK-NEXT:    add z0.b, z2.b, z2.b
558; CHECK-NEXT:    add z7.b, z7.b, z7.b
559; CHECK-NEXT:    splice z1.b, p0, { z16.b, z17.b }
560; CHECK-NEXT:    splice z2.b, p0, { z18.b, z19.b }
561; CHECK-NEXT:    splice z16.b, p0, { z20.b, z21.b }
562; CHECK-NEXT:    splice z4.b, p0, { z4.b, z5.b }
563; CHECK-NEXT:    add z6.b, z6.b, z6.b
564; CHECK-NEXT:    add z3.b, z3.b, z3.b
565; CHECK-NEXT:    stp q0, q7, [x1, #96]
566; CHECK-NEXT:    add z0.b, z1.b, z1.b
567; CHECK-NEXT:    add z1.b, z2.b, z2.b
568; CHECK-NEXT:    add z2.b, z16.b, z16.b
569; CHECK-NEXT:    stp q6, q3, [x1, #64]
570; CHECK-NEXT:    add z3.b, z4.b, z4.b
571; CHECK-NEXT:    stp q0, q1, [x1, #32]
572; CHECK-NEXT:    stp q2, q3, [x1]
573; CHECK-NEXT:    ret
574;
575; NONEON-NOSVE-LABEL: trunc_v128i16_v128i8:
576; NONEON-NOSVE:       // %bb.0:
577; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
578; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #16] // 16-byte Folded Spill
579; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #32] // 16-byte Folded Spill
580; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
581; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
582; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
583; NONEON-NOSVE-NEXT:    sub sp, sp, #800
584; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0, #32]
585; NONEON-NOSVE-NEXT:    str x1, [sp, #408] // 8-byte Folded Spill
586; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0]
587; NONEON-NOSVE-NEXT:    ldp q5, q4, [x0, #96]
588; NONEON-NOSVE-NEXT:    ldp q7, q6, [x0, #64]
589; NONEON-NOSVE-NEXT:    ldp q17, q16, [x0, #192]
590; NONEON-NOSVE-NEXT:    ldp q19, q18, [x0, #160]
591; NONEON-NOSVE-NEXT:    ldp q21, q20, [x0, #128]
592; NONEON-NOSVE-NEXT:    ldp q23, q22, [x0, #224]
593; NONEON-NOSVE-NEXT:    str q0, [sp, #592]
594; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #606]
595; NONEON-NOSVE-NEXT:    str q19, [sp, #496]
596; NONEON-NOSVE-NEXT:    ldrh w10, [sp, #600]
597; NONEON-NOSVE-NEXT:    stp q18, q20, [sp, #512]
598; NONEON-NOSVE-NEXT:    ldrh w11, [sp, #598]
599; NONEON-NOSVE-NEXT:    ldrh w12, [sp, #596]
600; NONEON-NOSVE-NEXT:    add w8, w8, w8
601; NONEON-NOSVE-NEXT:    stp q17, q23, [sp, #432]
602; NONEON-NOSVE-NEXT:    ldrh w13, [sp, #594]
603; NONEON-NOSVE-NEXT:    str w8, [sp, #64] // 4-byte Folded Spill
604; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #432]
605; NONEON-NOSVE-NEXT:    ldrh w14, [sp, #592]
606; NONEON-NOSVE-NEXT:    stp q22, q16, [sp, #464]
607; NONEON-NOSVE-NEXT:    ldr w30, [sp, #64] // 4-byte Folded Reload
608; NONEON-NOSVE-NEXT:    str w8, [sp, #404] // 4-byte Folded Spill
609; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #434]
610; NONEON-NOSVE-NEXT:    stp q4, q6, [sp, #560]
611; NONEON-NOSVE-NEXT:    str w8, [sp, #400] // 4-byte Folded Spill
612; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #436]
613; NONEON-NOSVE-NEXT:    str q5, [sp, #544]
614; NONEON-NOSVE-NEXT:    str w8, [sp, #396] // 4-byte Folded Spill
615; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #438]
616; NONEON-NOSVE-NEXT:    stp q2, q1, [sp, #608]
617; NONEON-NOSVE-NEXT:    str w8, [sp, #392] // 4-byte Folded Spill
618; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #440]
619; NONEON-NOSVE-NEXT:    ldrh w15, [sp, #638]
620; NONEON-NOSVE-NEXT:    stp q7, q21, [sp, #640]
621; NONEON-NOSVE-NEXT:    ldrh w16, [sp, #636]
622; NONEON-NOSVE-NEXT:    ldrh w17, [sp, #634]
623; NONEON-NOSVE-NEXT:    str w8, [sp, #388] // 4-byte Folded Spill
624; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #442]
625; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #666]
626; NONEON-NOSVE-NEXT:    str q3, [sp, #416]
627; NONEON-NOSVE-NEXT:    ldrh w18, [sp, #632]
628; NONEON-NOSVE-NEXT:    ldrh w0, [sp, #630]
629; NONEON-NOSVE-NEXT:    str w8, [sp, #384] // 4-byte Folded Spill
630; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #444]
631; NONEON-NOSVE-NEXT:    ldrh w1, [sp, #628]
632; NONEON-NOSVE-NEXT:    ldrh w2, [sp, #626]
633; NONEON-NOSVE-NEXT:    ldrh w3, [sp, #624]
634; NONEON-NOSVE-NEXT:    ldrh w4, [sp, #622]
635; NONEON-NOSVE-NEXT:    str w8, [sp, #380] // 4-byte Folded Spill
636; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #446]
637; NONEON-NOSVE-NEXT:    ldrh w5, [sp, #620]
638; NONEON-NOSVE-NEXT:    ldrh w6, [sp, #618]
639; NONEON-NOSVE-NEXT:    ldrh w7, [sp, #616]
640; NONEON-NOSVE-NEXT:    ldrh w19, [sp, #614]
641; NONEON-NOSVE-NEXT:    str w8, [sp, #376] // 4-byte Folded Spill
642; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #480]
643; NONEON-NOSVE-NEXT:    ldrh w20, [sp, #612]
644; NONEON-NOSVE-NEXT:    ldrh w21, [sp, #610]
645; NONEON-NOSVE-NEXT:    ldrh w22, [sp, #608]
646; NONEON-NOSVE-NEXT:    ldrh w23, [sp, #430]
647; NONEON-NOSVE-NEXT:    str w8, [sp, #372] // 4-byte Folded Spill
648; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #482]
649; NONEON-NOSVE-NEXT:    ldrh w24, [sp, #428]
650; NONEON-NOSVE-NEXT:    ldrh w25, [sp, #426]
651; NONEON-NOSVE-NEXT:    ldrh w26, [sp, #424]
652; NONEON-NOSVE-NEXT:    ldrh w27, [sp, #422]
653; NONEON-NOSVE-NEXT:    str w8, [sp, #368] // 4-byte Folded Spill
654; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #484]
655; NONEON-NOSVE-NEXT:    ldrh w28, [sp, #420]
656; NONEON-NOSVE-NEXT:    ldrh w29, [sp, #418]
657; NONEON-NOSVE-NEXT:    strb w30, [sp, #767]
658; NONEON-NOSVE-NEXT:    str w8, [sp, #364] // 4-byte Folded Spill
659; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #486]
660; NONEON-NOSVE-NEXT:    str w8, [sp, #360] // 4-byte Folded Spill
661; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #488]
662; NONEON-NOSVE-NEXT:    str w8, [sp, #356] // 4-byte Folded Spill
663; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #490]
664; NONEON-NOSVE-NEXT:    str w8, [sp, #352] // 4-byte Folded Spill
665; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #492]
666; NONEON-NOSVE-NEXT:    str w8, [sp, #348] // 4-byte Folded Spill
667; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #494]
668; NONEON-NOSVE-NEXT:    str w8, [sp, #344] // 4-byte Folded Spill
669; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #448]
670; NONEON-NOSVE-NEXT:    str w8, [sp, #340] // 4-byte Folded Spill
671; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #450]
672; NONEON-NOSVE-NEXT:    str w8, [sp, #336] // 4-byte Folded Spill
673; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #452]
674; NONEON-NOSVE-NEXT:    str w8, [sp, #332] // 4-byte Folded Spill
675; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #454]
676; NONEON-NOSVE-NEXT:    str w8, [sp, #328] // 4-byte Folded Spill
677; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #456]
678; NONEON-NOSVE-NEXT:    str w8, [sp, #324] // 4-byte Folded Spill
679; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #458]
680; NONEON-NOSVE-NEXT:    str w8, [sp, #320] // 4-byte Folded Spill
681; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #460]
682; NONEON-NOSVE-NEXT:    str w8, [sp, #316] // 4-byte Folded Spill
683; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #462]
684; NONEON-NOSVE-NEXT:    str w8, [sp, #312] // 4-byte Folded Spill
685; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #464]
686; NONEON-NOSVE-NEXT:    str w8, [sp, #308] // 4-byte Folded Spill
687; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #466]
688; NONEON-NOSVE-NEXT:    str w8, [sp, #304] // 4-byte Folded Spill
689; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #468]
690; NONEON-NOSVE-NEXT:    str w8, [sp, #300] // 4-byte Folded Spill
691; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #470]
692; NONEON-NOSVE-NEXT:    str w8, [sp, #296] // 4-byte Folded Spill
693; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #472]
694; NONEON-NOSVE-NEXT:    str w8, [sp, #292] // 4-byte Folded Spill
695; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #474]
696; NONEON-NOSVE-NEXT:    str w8, [sp, #288] // 4-byte Folded Spill
697; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #476]
698; NONEON-NOSVE-NEXT:    str w8, [sp, #284] // 4-byte Folded Spill
699; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #478]
700; NONEON-NOSVE-NEXT:    str w8, [sp, #280] // 4-byte Folded Spill
701; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #656]
702; NONEON-NOSVE-NEXT:    str w8, [sp, #276] // 4-byte Folded Spill
703; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #658]
704; NONEON-NOSVE-NEXT:    str w8, [sp, #272] // 4-byte Folded Spill
705; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #660]
706; NONEON-NOSVE-NEXT:    str w8, [sp, #268] // 4-byte Folded Spill
707; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #662]
708; NONEON-NOSVE-NEXT:    str w8, [sp, #264] // 4-byte Folded Spill
709; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #664]
710; NONEON-NOSVE-NEXT:    str w8, [sp, #260] // 4-byte Folded Spill
711; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #668]
712; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #252] // 8-byte Folded Spill
713; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #670]
714; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #528]
715; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #244] // 8-byte Folded Spill
716; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #530]
717; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #532]
718; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #236] // 8-byte Folded Spill
719; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #534]
720; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #536]
721; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #228] // 8-byte Folded Spill
722; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #538]
723; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #540]
724; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #220] // 8-byte Folded Spill
725; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #542]
726; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #496]
727; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #212] // 8-byte Folded Spill
728; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #498]
729; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #500]
730; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #204] // 8-byte Folded Spill
731; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #502]
732; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #504]
733; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #196] // 8-byte Folded Spill
734; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #506]
735; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #508]
736; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #188] // 8-byte Folded Spill
737; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #510]
738; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #512]
739; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #180] // 8-byte Folded Spill
740; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #514]
741; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #516]
742; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #172] // 8-byte Folded Spill
743; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #518]
744; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #520]
745; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #164] // 8-byte Folded Spill
746; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #522]
747; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #524]
748; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #156] // 8-byte Folded Spill
749; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #526]
750; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #640]
751; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #148] // 8-byte Folded Spill
752; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #642]
753; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #644]
754; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #140] // 8-byte Folded Spill
755; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #646]
756; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #648]
757; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #132] // 8-byte Folded Spill
758; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #650]
759; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #652]
760; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #124] // 8-byte Folded Spill
761; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #654]
762; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #576]
763; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #116] // 8-byte Folded Spill
764; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #578]
765; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #580]
766; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #108] // 8-byte Folded Spill
767; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #582]
768; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #584]
769; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #100] // 8-byte Folded Spill
770; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #586]
771; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #588]
772; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #92] // 8-byte Folded Spill
773; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #590]
774; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #544]
775; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #84] // 8-byte Folded Spill
776; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #546]
777; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #548]
778; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #76] // 8-byte Folded Spill
779; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #550]
780; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #552]
781; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #68] // 8-byte Folded Spill
782; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #554]
783; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #556]
784; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56] // 8-byte Folded Spill
785; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #558]
786; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #560]
787; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48] // 8-byte Folded Spill
788; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #562]
789; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #564]
790; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40] // 8-byte Folded Spill
791; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #566]
792; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #568]
793; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32] // 8-byte Folded Spill
794; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #570]
795; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #572]
796; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24] // 8-byte Folded Spill
797; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #574]
798; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #416]
799; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16] // 8-byte Folded Spill
800; NONEON-NOSVE-NEXT:    ldrh w8, [sp, #602]
801; NONEON-NOSVE-NEXT:    ldrh w9, [sp, #604]
802; NONEON-NOSVE-NEXT:    add w8, w8, w8
803; NONEON-NOSVE-NEXT:    add w9, w9, w9
804; NONEON-NOSVE-NEXT:    strb w8, [sp, #765]
805; NONEON-NOSVE-NEXT:    add w8, w10, w10
806; NONEON-NOSVE-NEXT:    strb w8, [sp, #764]
807; NONEON-NOSVE-NEXT:    add w8, w11, w11
808; NONEON-NOSVE-NEXT:    strb w8, [sp, #763]
809; NONEON-NOSVE-NEXT:    add w8, w12, w12
810; NONEON-NOSVE-NEXT:    strb w8, [sp, #762]
811; NONEON-NOSVE-NEXT:    add w8, w13, w13
812; NONEON-NOSVE-NEXT:    strb w8, [sp, #761]
813; NONEON-NOSVE-NEXT:    add w8, w14, w14
814; NONEON-NOSVE-NEXT:    strb w8, [sp, #760]
815; NONEON-NOSVE-NEXT:    add w8, w15, w15
816; NONEON-NOSVE-NEXT:    strb w8, [sp, #759]
817; NONEON-NOSVE-NEXT:    add w8, w16, w16
818; NONEON-NOSVE-NEXT:    strb w8, [sp, #758]
819; NONEON-NOSVE-NEXT:    add w8, w17, w17
820; NONEON-NOSVE-NEXT:    strb w8, [sp, #757]
821; NONEON-NOSVE-NEXT:    add w8, w18, w18
822; NONEON-NOSVE-NEXT:    strb w8, [sp, #756]
823; NONEON-NOSVE-NEXT:    add w8, w0, w0
824; NONEON-NOSVE-NEXT:    strb w8, [sp, #755]
825; NONEON-NOSVE-NEXT:    add w8, w1, w1
826; NONEON-NOSVE-NEXT:    strb w8, [sp, #754]
827; NONEON-NOSVE-NEXT:    add w8, w2, w2
828; NONEON-NOSVE-NEXT:    strb w8, [sp, #753]
829; NONEON-NOSVE-NEXT:    add w8, w3, w3
830; NONEON-NOSVE-NEXT:    strb w8, [sp, #752]
831; NONEON-NOSVE-NEXT:    add w8, w4, w4
832; NONEON-NOSVE-NEXT:    strb w8, [sp, #751]
833; NONEON-NOSVE-NEXT:    add w8, w5, w5
834; NONEON-NOSVE-NEXT:    strb w8, [sp, #750]
835; NONEON-NOSVE-NEXT:    add w8, w6, w6
836; NONEON-NOSVE-NEXT:    strb w8, [sp, #749]
837; NONEON-NOSVE-NEXT:    add w8, w7, w7
838; NONEON-NOSVE-NEXT:    strb w8, [sp, #748]
839; NONEON-NOSVE-NEXT:    add w8, w19, w19
840; NONEON-NOSVE-NEXT:    strb w8, [sp, #747]
841; NONEON-NOSVE-NEXT:    add w8, w20, w20
842; NONEON-NOSVE-NEXT:    strb w8, [sp, #746]
843; NONEON-NOSVE-NEXT:    add w8, w21, w21
844; NONEON-NOSVE-NEXT:    strb w8, [sp, #745]
845; NONEON-NOSVE-NEXT:    add w8, w22, w22
846; NONEON-NOSVE-NEXT:    strb w8, [sp, #744]
847; NONEON-NOSVE-NEXT:    add w8, w23, w23
848; NONEON-NOSVE-NEXT:    strb w8, [sp, #743]
849; NONEON-NOSVE-NEXT:    add w8, w24, w24
850; NONEON-NOSVE-NEXT:    strb w8, [sp, #742]
851; NONEON-NOSVE-NEXT:    add w8, w25, w25
852; NONEON-NOSVE-NEXT:    strb w8, [sp, #741]
853; NONEON-NOSVE-NEXT:    add w8, w26, w26
854; NONEON-NOSVE-NEXT:    strb w8, [sp, #740]
855; NONEON-NOSVE-NEXT:    add w8, w27, w27
856; NONEON-NOSVE-NEXT:    strb w8, [sp, #739]
857; NONEON-NOSVE-NEXT:    add w8, w28, w28
858; NONEON-NOSVE-NEXT:    strb w8, [sp, #738]
859; NONEON-NOSVE-NEXT:    add w8, w29, w29
860; NONEON-NOSVE-NEXT:    strb w8, [sp, #737]
861; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16] // 4-byte Folded Reload
862; NONEON-NOSVE-NEXT:    strb w9, [sp, #766]
863; NONEON-NOSVE-NEXT:    add w8, w8, w8
864; NONEON-NOSVE-NEXT:    strb w8, [sp, #736]
865; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20] // 4-byte Folded Reload
866; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #736]
867; NONEON-NOSVE-NEXT:    add w8, w8, w8
868; NONEON-NOSVE-NEXT:    strb w8, [sp, #735]
869; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24] // 4-byte Folded Reload
870; NONEON-NOSVE-NEXT:    add w8, w8, w8
871; NONEON-NOSVE-NEXT:    strb w8, [sp, #734]
872; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28] // 4-byte Folded Reload
873; NONEON-NOSVE-NEXT:    add w8, w8, w8
874; NONEON-NOSVE-NEXT:    strb w8, [sp, #733]
875; NONEON-NOSVE-NEXT:    ldr w8, [sp, #32] // 4-byte Folded Reload
876; NONEON-NOSVE-NEXT:    add w8, w8, w8
877; NONEON-NOSVE-NEXT:    strb w8, [sp, #732]
878; NONEON-NOSVE-NEXT:    ldr w8, [sp, #36] // 4-byte Folded Reload
879; NONEON-NOSVE-NEXT:    add w8, w8, w8
880; NONEON-NOSVE-NEXT:    strb w8, [sp, #731]
881; NONEON-NOSVE-NEXT:    ldr w8, [sp, #40] // 4-byte Folded Reload
882; NONEON-NOSVE-NEXT:    add w8, w8, w8
883; NONEON-NOSVE-NEXT:    strb w8, [sp, #730]
884; NONEON-NOSVE-NEXT:    ldr w8, [sp, #44] // 4-byte Folded Reload
885; NONEON-NOSVE-NEXT:    add w8, w8, w8
886; NONEON-NOSVE-NEXT:    strb w8, [sp, #729]
887; NONEON-NOSVE-NEXT:    ldr w8, [sp, #48] // 4-byte Folded Reload
888; NONEON-NOSVE-NEXT:    add w8, w8, w8
889; NONEON-NOSVE-NEXT:    strb w8, [sp, #728]
890; NONEON-NOSVE-NEXT:    ldr w8, [sp, #52] // 4-byte Folded Reload
891; NONEON-NOSVE-NEXT:    add w8, w8, w8
892; NONEON-NOSVE-NEXT:    strb w8, [sp, #727]
893; NONEON-NOSVE-NEXT:    ldr w8, [sp, #56] // 4-byte Folded Reload
894; NONEON-NOSVE-NEXT:    add w8, w8, w8
895; NONEON-NOSVE-NEXT:    strb w8, [sp, #726]
896; NONEON-NOSVE-NEXT:    ldr w8, [sp, #60] // 4-byte Folded Reload
897; NONEON-NOSVE-NEXT:    add w8, w8, w8
898; NONEON-NOSVE-NEXT:    strb w8, [sp, #725]
899; NONEON-NOSVE-NEXT:    ldr w8, [sp, #68] // 4-byte Folded Reload
900; NONEON-NOSVE-NEXT:    add w8, w8, w8
901; NONEON-NOSVE-NEXT:    strb w8, [sp, #724]
902; NONEON-NOSVE-NEXT:    ldr w8, [sp, #72] // 4-byte Folded Reload
903; NONEON-NOSVE-NEXT:    add w8, w8, w8
904; NONEON-NOSVE-NEXT:    strb w8, [sp, #723]
905; NONEON-NOSVE-NEXT:    ldr w8, [sp, #76] // 4-byte Folded Reload
906; NONEON-NOSVE-NEXT:    add w8, w8, w8
907; NONEON-NOSVE-NEXT:    strb w8, [sp, #722]
908; NONEON-NOSVE-NEXT:    ldr w8, [sp, #80] // 4-byte Folded Reload
909; NONEON-NOSVE-NEXT:    add w8, w8, w8
910; NONEON-NOSVE-NEXT:    strb w8, [sp, #721]
911; NONEON-NOSVE-NEXT:    ldr w8, [sp, #84] // 4-byte Folded Reload
912; NONEON-NOSVE-NEXT:    add w8, w8, w8
913; NONEON-NOSVE-NEXT:    strb w8, [sp, #720]
914; NONEON-NOSVE-NEXT:    ldr w8, [sp, #88] // 4-byte Folded Reload
915; NONEON-NOSVE-NEXT:    add w8, w8, w8
916; NONEON-NOSVE-NEXT:    strb w8, [sp, #783]
917; NONEON-NOSVE-NEXT:    ldr w8, [sp, #92] // 4-byte Folded Reload
918; NONEON-NOSVE-NEXT:    add w8, w8, w8
919; NONEON-NOSVE-NEXT:    strb w8, [sp, #782]
920; NONEON-NOSVE-NEXT:    ldr w8, [sp, #96] // 4-byte Folded Reload
921; NONEON-NOSVE-NEXT:    add w8, w8, w8
922; NONEON-NOSVE-NEXT:    strb w8, [sp, #781]
923; NONEON-NOSVE-NEXT:    ldr w8, [sp, #100] // 4-byte Folded Reload
924; NONEON-NOSVE-NEXT:    add w8, w8, w8
925; NONEON-NOSVE-NEXT:    strb w8, [sp, #780]
926; NONEON-NOSVE-NEXT:    ldr w8, [sp, #104] // 4-byte Folded Reload
927; NONEON-NOSVE-NEXT:    add w8, w8, w8
928; NONEON-NOSVE-NEXT:    strb w8, [sp, #779]
929; NONEON-NOSVE-NEXT:    ldr w8, [sp, #108] // 4-byte Folded Reload
930; NONEON-NOSVE-NEXT:    add w8, w8, w8
931; NONEON-NOSVE-NEXT:    strb w8, [sp, #778]
932; NONEON-NOSVE-NEXT:    ldr w8, [sp, #112] // 4-byte Folded Reload
933; NONEON-NOSVE-NEXT:    add w8, w8, w8
934; NONEON-NOSVE-NEXT:    strb w8, [sp, #777]
935; NONEON-NOSVE-NEXT:    ldr w8, [sp, #116] // 4-byte Folded Reload
936; NONEON-NOSVE-NEXT:    add w8, w8, w8
937; NONEON-NOSVE-NEXT:    strb w8, [sp, #776]
938; NONEON-NOSVE-NEXT:    ldr w8, [sp, #120] // 4-byte Folded Reload
939; NONEON-NOSVE-NEXT:    add w8, w8, w8
940; NONEON-NOSVE-NEXT:    strb w8, [sp, #775]
941; NONEON-NOSVE-NEXT:    ldr w8, [sp, #124] // 4-byte Folded Reload
942; NONEON-NOSVE-NEXT:    add w8, w8, w8
943; NONEON-NOSVE-NEXT:    strb w8, [sp, #774]
944; NONEON-NOSVE-NEXT:    ldr w8, [sp, #128] // 4-byte Folded Reload
945; NONEON-NOSVE-NEXT:    add w8, w8, w8
946; NONEON-NOSVE-NEXT:    strb w8, [sp, #773]
947; NONEON-NOSVE-NEXT:    ldr w8, [sp, #132] // 4-byte Folded Reload
948; NONEON-NOSVE-NEXT:    add w8, w8, w8
949; NONEON-NOSVE-NEXT:    strb w8, [sp, #772]
950; NONEON-NOSVE-NEXT:    ldr w8, [sp, #136] // 4-byte Folded Reload
951; NONEON-NOSVE-NEXT:    add w8, w8, w8
952; NONEON-NOSVE-NEXT:    strb w8, [sp, #771]
953; NONEON-NOSVE-NEXT:    ldr w8, [sp, #140] // 4-byte Folded Reload
954; NONEON-NOSVE-NEXT:    add w8, w8, w8
955; NONEON-NOSVE-NEXT:    strb w8, [sp, #770]
956; NONEON-NOSVE-NEXT:    ldr w8, [sp, #144] // 4-byte Folded Reload
957; NONEON-NOSVE-NEXT:    add w8, w8, w8
958; NONEON-NOSVE-NEXT:    strb w8, [sp, #769]
959; NONEON-NOSVE-NEXT:    ldr w8, [sp, #148] // 4-byte Folded Reload
960; NONEON-NOSVE-NEXT:    add w8, w8, w8
961; NONEON-NOSVE-NEXT:    strb w8, [sp, #768]
962; NONEON-NOSVE-NEXT:    ldr w8, [sp, #152] // 4-byte Folded Reload
963; NONEON-NOSVE-NEXT:    add w8, w8, w8
964; NONEON-NOSVE-NEXT:    strb w8, [sp, #719]
965; NONEON-NOSVE-NEXT:    ldr w8, [sp, #156] // 4-byte Folded Reload
966; NONEON-NOSVE-NEXT:    add w8, w8, w8
967; NONEON-NOSVE-NEXT:    strb w8, [sp, #718]
968; NONEON-NOSVE-NEXT:    ldr w8, [sp, #160] // 4-byte Folded Reload
969; NONEON-NOSVE-NEXT:    add w8, w8, w8
970; NONEON-NOSVE-NEXT:    strb w8, [sp, #717]
971; NONEON-NOSVE-NEXT:    ldr w8, [sp, #164] // 4-byte Folded Reload
972; NONEON-NOSVE-NEXT:    add w8, w8, w8
973; NONEON-NOSVE-NEXT:    strb w8, [sp, #716]
974; NONEON-NOSVE-NEXT:    ldr w8, [sp, #168] // 4-byte Folded Reload
975; NONEON-NOSVE-NEXT:    add w8, w8, w8
976; NONEON-NOSVE-NEXT:    strb w8, [sp, #715]
977; NONEON-NOSVE-NEXT:    ldr w8, [sp, #172] // 4-byte Folded Reload
978; NONEON-NOSVE-NEXT:    add w8, w8, w8
979; NONEON-NOSVE-NEXT:    strb w8, [sp, #714]
980; NONEON-NOSVE-NEXT:    ldr w8, [sp, #176] // 4-byte Folded Reload
981; NONEON-NOSVE-NEXT:    add w8, w8, w8
982; NONEON-NOSVE-NEXT:    strb w8, [sp, #713]
983; NONEON-NOSVE-NEXT:    ldr w8, [sp, #180] // 4-byte Folded Reload
984; NONEON-NOSVE-NEXT:    add w8, w8, w8
985; NONEON-NOSVE-NEXT:    strb w8, [sp, #712]
986; NONEON-NOSVE-NEXT:    ldr w8, [sp, #184] // 4-byte Folded Reload
987; NONEON-NOSVE-NEXT:    add w8, w8, w8
988; NONEON-NOSVE-NEXT:    strb w8, [sp, #711]
989; NONEON-NOSVE-NEXT:    ldr w8, [sp, #188] // 4-byte Folded Reload
990; NONEON-NOSVE-NEXT:    add w8, w8, w8
991; NONEON-NOSVE-NEXT:    strb w8, [sp, #710]
992; NONEON-NOSVE-NEXT:    ldr w8, [sp, #192] // 4-byte Folded Reload
993; NONEON-NOSVE-NEXT:    add w8, w8, w8
994; NONEON-NOSVE-NEXT:    strb w8, [sp, #709]
995; NONEON-NOSVE-NEXT:    ldr w8, [sp, #196] // 4-byte Folded Reload
996; NONEON-NOSVE-NEXT:    add w8, w8, w8
997; NONEON-NOSVE-NEXT:    strb w8, [sp, #708]
998; NONEON-NOSVE-NEXT:    ldr w8, [sp, #200] // 4-byte Folded Reload
999; NONEON-NOSVE-NEXT:    add w8, w8, w8
1000; NONEON-NOSVE-NEXT:    strb w8, [sp, #707]
1001; NONEON-NOSVE-NEXT:    ldr w8, [sp, #204] // 4-byte Folded Reload
1002; NONEON-NOSVE-NEXT:    add w8, w8, w8
1003; NONEON-NOSVE-NEXT:    strb w8, [sp, #706]
1004; NONEON-NOSVE-NEXT:    ldr w8, [sp, #208] // 4-byte Folded Reload
1005; NONEON-NOSVE-NEXT:    add w8, w8, w8
1006; NONEON-NOSVE-NEXT:    strb w8, [sp, #705]
1007; NONEON-NOSVE-NEXT:    ldr w8, [sp, #212] // 4-byte Folded Reload
1008; NONEON-NOSVE-NEXT:    add w8, w8, w8
1009; NONEON-NOSVE-NEXT:    strb w8, [sp, #704]
1010; NONEON-NOSVE-NEXT:    ldr w8, [sp, #216] // 4-byte Folded Reload
1011; NONEON-NOSVE-NEXT:    ldp q6, q3, [sp, #704]
1012; NONEON-NOSVE-NEXT:    add w8, w8, w8
1013; NONEON-NOSVE-NEXT:    strb w8, [sp, #799]
1014; NONEON-NOSVE-NEXT:    ldr w8, [sp, #220] // 4-byte Folded Reload
1015; NONEON-NOSVE-NEXT:    add w8, w8, w8
1016; NONEON-NOSVE-NEXT:    strb w8, [sp, #798]
1017; NONEON-NOSVE-NEXT:    ldr w8, [sp, #224] // 4-byte Folded Reload
1018; NONEON-NOSVE-NEXT:    add w8, w8, w8
1019; NONEON-NOSVE-NEXT:    strb w8, [sp, #797]
1020; NONEON-NOSVE-NEXT:    ldr w8, [sp, #228] // 4-byte Folded Reload
1021; NONEON-NOSVE-NEXT:    add w8, w8, w8
1022; NONEON-NOSVE-NEXT:    strb w8, [sp, #796]
1023; NONEON-NOSVE-NEXT:    ldr w8, [sp, #232] // 4-byte Folded Reload
1024; NONEON-NOSVE-NEXT:    add w8, w8, w8
1025; NONEON-NOSVE-NEXT:    strb w8, [sp, #795]
1026; NONEON-NOSVE-NEXT:    ldr w8, [sp, #236] // 4-byte Folded Reload
1027; NONEON-NOSVE-NEXT:    add w8, w8, w8
1028; NONEON-NOSVE-NEXT:    strb w8, [sp, #794]
1029; NONEON-NOSVE-NEXT:    ldr w8, [sp, #240] // 4-byte Folded Reload
1030; NONEON-NOSVE-NEXT:    add w8, w8, w8
1031; NONEON-NOSVE-NEXT:    strb w8, [sp, #793]
1032; NONEON-NOSVE-NEXT:    ldr w8, [sp, #244] // 4-byte Folded Reload
1033; NONEON-NOSVE-NEXT:    add w8, w8, w8
1034; NONEON-NOSVE-NEXT:    strb w8, [sp, #792]
1035; NONEON-NOSVE-NEXT:    ldr w8, [sp, #248] // 4-byte Folded Reload
1036; NONEON-NOSVE-NEXT:    add w8, w8, w8
1037; NONEON-NOSVE-NEXT:    strb w8, [sp, #791]
1038; NONEON-NOSVE-NEXT:    ldr w8, [sp, #252] // 4-byte Folded Reload
1039; NONEON-NOSVE-NEXT:    add w8, w8, w8
1040; NONEON-NOSVE-NEXT:    strb w8, [sp, #790]
1041; NONEON-NOSVE-NEXT:    ldr w8, [sp, #256] // 4-byte Folded Reload
1042; NONEON-NOSVE-NEXT:    add w8, w8, w8
1043; NONEON-NOSVE-NEXT:    strb w8, [sp, #789]
1044; NONEON-NOSVE-NEXT:    ldr w8, [sp, #260] // 4-byte Folded Reload
1045; NONEON-NOSVE-NEXT:    add w8, w8, w8
1046; NONEON-NOSVE-NEXT:    strb w8, [sp, #788]
1047; NONEON-NOSVE-NEXT:    ldr w8, [sp, #264] // 4-byte Folded Reload
1048; NONEON-NOSVE-NEXT:    add w8, w8, w8
1049; NONEON-NOSVE-NEXT:    strb w8, [sp, #787]
1050; NONEON-NOSVE-NEXT:    ldr w8, [sp, #268] // 4-byte Folded Reload
1051; NONEON-NOSVE-NEXT:    add w8, w8, w8
1052; NONEON-NOSVE-NEXT:    strb w8, [sp, #786]
1053; NONEON-NOSVE-NEXT:    ldr w8, [sp, #272] // 4-byte Folded Reload
1054; NONEON-NOSVE-NEXT:    add w8, w8, w8
1055; NONEON-NOSVE-NEXT:    strb w8, [sp, #785]
1056; NONEON-NOSVE-NEXT:    ldr w8, [sp, #276] // 4-byte Folded Reload
1057; NONEON-NOSVE-NEXT:    add w8, w8, w8
1058; NONEON-NOSVE-NEXT:    strb w8, [sp, #784]
1059; NONEON-NOSVE-NEXT:    ldr w8, [sp, #280] // 4-byte Folded Reload
1060; NONEON-NOSVE-NEXT:    ldp q4, q7, [sp, #768]
1061; NONEON-NOSVE-NEXT:    add w8, w8, w8
1062; NONEON-NOSVE-NEXT:    strb w8, [sp, #687]
1063; NONEON-NOSVE-NEXT:    ldr w8, [sp, #284] // 4-byte Folded Reload
1064; NONEON-NOSVE-NEXT:    add w8, w8, w8
1065; NONEON-NOSVE-NEXT:    strb w8, [sp, #686]
1066; NONEON-NOSVE-NEXT:    ldr w8, [sp, #288] // 4-byte Folded Reload
1067; NONEON-NOSVE-NEXT:    add w8, w8, w8
1068; NONEON-NOSVE-NEXT:    strb w8, [sp, #685]
1069; NONEON-NOSVE-NEXT:    ldr w8, [sp, #292] // 4-byte Folded Reload
1070; NONEON-NOSVE-NEXT:    add w8, w8, w8
1071; NONEON-NOSVE-NEXT:    strb w8, [sp, #684]
1072; NONEON-NOSVE-NEXT:    ldr w8, [sp, #296] // 4-byte Folded Reload
1073; NONEON-NOSVE-NEXT:    add w8, w8, w8
1074; NONEON-NOSVE-NEXT:    strb w8, [sp, #683]
1075; NONEON-NOSVE-NEXT:    ldr w8, [sp, #300] // 4-byte Folded Reload
1076; NONEON-NOSVE-NEXT:    add w8, w8, w8
1077; NONEON-NOSVE-NEXT:    strb w8, [sp, #682]
1078; NONEON-NOSVE-NEXT:    ldr w8, [sp, #304] // 4-byte Folded Reload
1079; NONEON-NOSVE-NEXT:    add w8, w8, w8
1080; NONEON-NOSVE-NEXT:    strb w8, [sp, #681]
1081; NONEON-NOSVE-NEXT:    ldr w8, [sp, #308] // 4-byte Folded Reload
1082; NONEON-NOSVE-NEXT:    add w8, w8, w8
1083; NONEON-NOSVE-NEXT:    strb w8, [sp, #680]
1084; NONEON-NOSVE-NEXT:    ldr w8, [sp, #312] // 4-byte Folded Reload
1085; NONEON-NOSVE-NEXT:    add w8, w8, w8
1086; NONEON-NOSVE-NEXT:    strb w8, [sp, #679]
1087; NONEON-NOSVE-NEXT:    ldr w8, [sp, #316] // 4-byte Folded Reload
1088; NONEON-NOSVE-NEXT:    add w8, w8, w8
1089; NONEON-NOSVE-NEXT:    strb w8, [sp, #678]
1090; NONEON-NOSVE-NEXT:    ldr w8, [sp, #320] // 4-byte Folded Reload
1091; NONEON-NOSVE-NEXT:    add w8, w8, w8
1092; NONEON-NOSVE-NEXT:    strb w8, [sp, #677]
1093; NONEON-NOSVE-NEXT:    ldr w8, [sp, #324] // 4-byte Folded Reload
1094; NONEON-NOSVE-NEXT:    add w8, w8, w8
1095; NONEON-NOSVE-NEXT:    strb w8, [sp, #676]
1096; NONEON-NOSVE-NEXT:    ldr w8, [sp, #328] // 4-byte Folded Reload
1097; NONEON-NOSVE-NEXT:    add w8, w8, w8
1098; NONEON-NOSVE-NEXT:    strb w8, [sp, #675]
1099; NONEON-NOSVE-NEXT:    ldr w8, [sp, #332] // 4-byte Folded Reload
1100; NONEON-NOSVE-NEXT:    add w8, w8, w8
1101; NONEON-NOSVE-NEXT:    strb w8, [sp, #674]
1102; NONEON-NOSVE-NEXT:    ldr w8, [sp, #336] // 4-byte Folded Reload
1103; NONEON-NOSVE-NEXT:    add w8, w8, w8
1104; NONEON-NOSVE-NEXT:    strb w8, [sp, #673]
1105; NONEON-NOSVE-NEXT:    ldr w8, [sp, #340] // 4-byte Folded Reload
1106; NONEON-NOSVE-NEXT:    add w8, w8, w8
1107; NONEON-NOSVE-NEXT:    strb w8, [sp, #672]
1108; NONEON-NOSVE-NEXT:    ldr w8, [sp, #344] // 4-byte Folded Reload
1109; NONEON-NOSVE-NEXT:    add w8, w8, w8
1110; NONEON-NOSVE-NEXT:    strb w8, [sp, #703]
1111; NONEON-NOSVE-NEXT:    ldr w8, [sp, #348] // 4-byte Folded Reload
1112; NONEON-NOSVE-NEXT:    add w8, w8, w8
1113; NONEON-NOSVE-NEXT:    strb w8, [sp, #702]
1114; NONEON-NOSVE-NEXT:    ldr w8, [sp, #352] // 4-byte Folded Reload
1115; NONEON-NOSVE-NEXT:    add w8, w8, w8
1116; NONEON-NOSVE-NEXT:    strb w8, [sp, #701]
1117; NONEON-NOSVE-NEXT:    ldr w8, [sp, #356] // 4-byte Folded Reload
1118; NONEON-NOSVE-NEXT:    add w8, w8, w8
1119; NONEON-NOSVE-NEXT:    strb w8, [sp, #700]
1120; NONEON-NOSVE-NEXT:    ldr w8, [sp, #360] // 4-byte Folded Reload
1121; NONEON-NOSVE-NEXT:    add w8, w8, w8
1122; NONEON-NOSVE-NEXT:    strb w8, [sp, #699]
1123; NONEON-NOSVE-NEXT:    ldr w8, [sp, #364] // 4-byte Folded Reload
1124; NONEON-NOSVE-NEXT:    add w8, w8, w8
1125; NONEON-NOSVE-NEXT:    strb w8, [sp, #698]
1126; NONEON-NOSVE-NEXT:    ldr w8, [sp, #368] // 4-byte Folded Reload
1127; NONEON-NOSVE-NEXT:    add w8, w8, w8
1128; NONEON-NOSVE-NEXT:    strb w8, [sp, #697]
1129; NONEON-NOSVE-NEXT:    ldr w8, [sp, #372] // 4-byte Folded Reload
1130; NONEON-NOSVE-NEXT:    add w8, w8, w8
1131; NONEON-NOSVE-NEXT:    strb w8, [sp, #696]
1132; NONEON-NOSVE-NEXT:    ldr w8, [sp, #376] // 4-byte Folded Reload
1133; NONEON-NOSVE-NEXT:    add w8, w8, w8
1134; NONEON-NOSVE-NEXT:    strb w8, [sp, #695]
1135; NONEON-NOSVE-NEXT:    ldr w8, [sp, #380] // 4-byte Folded Reload
1136; NONEON-NOSVE-NEXT:    add w8, w8, w8
1137; NONEON-NOSVE-NEXT:    strb w8, [sp, #694]
1138; NONEON-NOSVE-NEXT:    ldr w8, [sp, #384] // 4-byte Folded Reload
1139; NONEON-NOSVE-NEXT:    add w8, w8, w8
1140; NONEON-NOSVE-NEXT:    strb w8, [sp, #693]
1141; NONEON-NOSVE-NEXT:    ldr w8, [sp, #388] // 4-byte Folded Reload
1142; NONEON-NOSVE-NEXT:    add w8, w8, w8
1143; NONEON-NOSVE-NEXT:    strb w8, [sp, #692]
1144; NONEON-NOSVE-NEXT:    ldr w8, [sp, #392] // 4-byte Folded Reload
1145; NONEON-NOSVE-NEXT:    add w8, w8, w8
1146; NONEON-NOSVE-NEXT:    strb w8, [sp, #691]
1147; NONEON-NOSVE-NEXT:    ldr w8, [sp, #396] // 4-byte Folded Reload
1148; NONEON-NOSVE-NEXT:    add w8, w8, w8
1149; NONEON-NOSVE-NEXT:    strb w8, [sp, #690]
1150; NONEON-NOSVE-NEXT:    ldr w8, [sp, #400] // 4-byte Folded Reload
1151; NONEON-NOSVE-NEXT:    add w8, w8, w8
1152; NONEON-NOSVE-NEXT:    strb w8, [sp, #689]
1153; NONEON-NOSVE-NEXT:    ldr w8, [sp, #404] // 4-byte Folded Reload
1154; NONEON-NOSVE-NEXT:    add w8, w8, w8
1155; NONEON-NOSVE-NEXT:    strb w8, [sp, #688]
1156; NONEON-NOSVE-NEXT:    ldr x8, [sp, #408] // 8-byte Folded Reload
1157; NONEON-NOSVE-NEXT:    ldp q5, q2, [sp, #672]
1158; NONEON-NOSVE-NEXT:    stp q1, q0, [x8]
1159; NONEON-NOSVE-NEXT:    stp q4, q3, [x8, #32]
1160; NONEON-NOSVE-NEXT:    stp q7, q6, [x8, #64]
1161; NONEON-NOSVE-NEXT:    stp q2, q5, [x8, #96]
1162; NONEON-NOSVE-NEXT:    add sp, sp, #800
1163; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
1164; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
1165; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
1166; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #32] // 16-byte Folded Reload
1167; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #16] // 16-byte Folded Reload
1168; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp], #96 // 16-byte Folded Reload
1169; NONEON-NOSVE-NEXT:    ret
1170  %a = load <128 x i16>, ptr %in
1171  %b = trunc <128 x i16> %a to <128 x i8>
1172  %c = add <128 x i8> %b, %b
1173  store <128 x i8> %c, ptr %out
1174  ret void
1175}
1176
1177;
1178; truncate i32 -> i8
1179;
1180
1181define <8 x i8> @trunc_v8i32_v8i8(ptr %in) nounwind {
1182; CHECK-LABEL: trunc_v8i32_v8i8:
1183; CHECK:       // %bb.0:
1184; CHECK-NEXT:    ldp q1, q0, [x0]
1185; CHECK-NEXT:    ptrue p0.h, vl4
1186; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
1187; CHECK-NEXT:    uzp1 z2.h, z1.h, z1.h
1188; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
1189; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
1190; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
1191; CHECK-NEXT:    ret
1192;
1193; NONEON-NOSVE-LABEL: trunc_v8i32_v8i8:
1194; NONEON-NOSVE:       // %bb.0:
1195; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
1196; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-48]!
1197; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #24]
1198; NONEON-NOSVE-NEXT:    strb w9, [sp, #47]
1199; NONEON-NOSVE-NEXT:    strb w8, [sp, #46]
1200; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #16]
1201; NONEON-NOSVE-NEXT:    strb w9, [sp, #45]
1202; NONEON-NOSVE-NEXT:    strb w8, [sp, #44]
1203; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #8]
1204; NONEON-NOSVE-NEXT:    strb w9, [sp, #43]
1205; NONEON-NOSVE-NEXT:    strb w8, [sp, #42]
1206; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp]
1207; NONEON-NOSVE-NEXT:    strb w9, [sp, #41]
1208; NONEON-NOSVE-NEXT:    strb w8, [sp, #40]
1209; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
1210; NONEON-NOSVE-NEXT:    add sp, sp, #48
1211; NONEON-NOSVE-NEXT:    ret
1212  %a = load <8 x i32>, ptr %in
1213  %b = trunc <8 x i32> %a to <8 x i8>
1214  ret <8 x i8> %b
1215}
1216
1217define <16 x i8> @trunc_v16i32_v16i8(ptr %in) nounwind {
1218; CHECK-LABEL: trunc_v16i32_v16i8:
1219; CHECK:       // %bb.0:
1220; CHECK-NEXT:    ldp q1, q0, [x0, #32]
1221; CHECK-NEXT:    ptrue p0.h, vl4
1222; CHECK-NEXT:    ldp q3, q2, [x0]
1223; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
1224; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
1225; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
1226; CHECK-NEXT:    uzp1 z0.h, z3.h, z3.h
1227; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
1228; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
1229; CHECK-NEXT:    ptrue p0.b, vl8
1230; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
1231; CHECK-NEXT:    uzp1 z1.b, z0.b, z0.b
1232; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
1233; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
1234; CHECK-NEXT:    ret
1235;
1236; NONEON-NOSVE-LABEL: trunc_v16i32_v16i8:
1237; NONEON-NOSVE:       // %bb.0:
1238; NONEON-NOSVE-NEXT:    sub sp, sp, #80
1239; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0, #32]
1240; NONEON-NOSVE-NEXT:    ldp q2, q3, [x0]
1241; NONEON-NOSVE-NEXT:    str q1, [sp, #48]
1242; NONEON-NOSVE-NEXT:    stp q0, q3, [sp, #16]
1243; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #24]
1244; NONEON-NOSVE-NEXT:    str q2, [sp]
1245; NONEON-NOSVE-NEXT:    strb w9, [sp, #79]
1246; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
1247; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #16]
1248; NONEON-NOSVE-NEXT:    strb w9, [sp, #77]
1249; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
1250; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #56]
1251; NONEON-NOSVE-NEXT:    strb w9, [sp, #75]
1252; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
1253; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #48]
1254; NONEON-NOSVE-NEXT:    strb w9, [sp, #73]
1255; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
1256; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #40]
1257; NONEON-NOSVE-NEXT:    strb w9, [sp, #71]
1258; NONEON-NOSVE-NEXT:    strb w8, [sp, #70]
1259; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #32]
1260; NONEON-NOSVE-NEXT:    strb w9, [sp, #69]
1261; NONEON-NOSVE-NEXT:    strb w8, [sp, #68]
1262; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #8]
1263; NONEON-NOSVE-NEXT:    strb w9, [sp, #67]
1264; NONEON-NOSVE-NEXT:    strb w8, [sp, #66]
1265; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp]
1266; NONEON-NOSVE-NEXT:    strb w9, [sp, #65]
1267; NONEON-NOSVE-NEXT:    strb w8, [sp, #64]
1268; NONEON-NOSVE-NEXT:    ldr q0, [sp, #64]
1269; NONEON-NOSVE-NEXT:    add sp, sp, #80
1270; NONEON-NOSVE-NEXT:    ret
1271  %a = load <16 x i32>, ptr %in
1272  %b = trunc <16 x i32> %a to <16 x i8>
1273  ret <16 x i8> %b
1274}
1275
1276; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
1277define void @trunc_v32i32_v32i8(ptr %in, ptr %out) nounwind {
1278; CHECK-LABEL: trunc_v32i32_v32i8:
1279; CHECK:       // %bb.0:
1280; CHECK-NEXT:    ldp q0, q1, [x0, #96]
1281; CHECK-NEXT:    ptrue p0.h, vl4
1282; CHECK-NEXT:    ldp q2, q3, [x0, #32]
1283; CHECK-NEXT:    ldp q4, q5, [x0, #64]
1284; CHECK-NEXT:    ldp q6, q7, [x0]
1285; CHECK-NEXT:    uzp1 z17.h, z1.h, z1.h
1286; CHECK-NEXT:    uzp1 z16.h, z0.h, z0.h
1287; CHECK-NEXT:    uzp1 z1.h, z3.h, z3.h
1288; CHECK-NEXT:    uzp1 z19.h, z5.h, z5.h
1289; CHECK-NEXT:    uzp1 z0.h, z2.h, z2.h
1290; CHECK-NEXT:    uzp1 z3.h, z7.h, z7.h
1291; CHECK-NEXT:    uzp1 z18.h, z4.h, z4.h
1292; CHECK-NEXT:    uzp1 z2.h, z6.h, z6.h
1293; CHECK-NEXT:    splice z4.h, p0, { z16.h, z17.h }
1294; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
1295; CHECK-NEXT:    splice z5.h, p0, { z18.h, z19.h }
1296; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
1297; CHECK-NEXT:    ptrue p0.b, vl8
1298; CHECK-NEXT:    uzp1 z3.b, z4.b, z4.b
1299; CHECK-NEXT:    uzp1 z7.b, z0.b, z0.b
1300; CHECK-NEXT:    uzp1 z2.b, z5.b, z5.b
1301; CHECK-NEXT:    uzp1 z6.b, z1.b, z1.b
1302; CHECK-NEXT:    splice z0.b, p0, { z2.b, z3.b }
1303; CHECK-NEXT:    splice z1.b, p0, { z6.b, z7.b }
1304; CHECK-NEXT:    add z0.b, z0.b, z0.b
1305; CHECK-NEXT:    add z1.b, z1.b, z1.b
1306; CHECK-NEXT:    stp q1, q0, [x1]
1307; CHECK-NEXT:    ret
1308;
1309; NONEON-NOSVE-LABEL: trunc_v32i32_v32i8:
1310; NONEON-NOSVE:       // %bb.0:
1311; NONEON-NOSVE-NEXT:    sub sp, sp, #272
1312; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0, #32]
1313; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #192] // 16-byte Folded Spill
1314; NONEON-NOSVE-NEXT:    ldp q5, q4, [x0]
1315; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #208] // 16-byte Folded Spill
1316; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #224] // 16-byte Folded Spill
1317; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0, #64]
1318; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #240] // 16-byte Folded Spill
1319; NONEON-NOSVE-NEXT:    ldp q7, q6, [x0, #96]
1320; NONEON-NOSVE-NEXT:    stp q2, q4, [sp, #80]
1321; NONEON-NOSVE-NEXT:    stp q3, q1, [sp, #112]
1322; NONEON-NOSVE-NEXT:    stp q5, q7, [sp, #16]
1323; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #88]
1324; NONEON-NOSVE-NEXT:    ldp w27, w28, [sp, #112]
1325; NONEON-NOSVE-NEXT:    ldp w25, w26, [sp, #104]
1326; NONEON-NOSVE-NEXT:    add w6, w8, w8
1327; NONEON-NOSVE-NEXT:    add w5, w9, w9
1328; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #256] // 16-byte Folded Spill
1329; NONEON-NOSVE-NEXT:    ldp w10, w8, [sp, #128]
1330; NONEON-NOSVE-NEXT:    ldp w23, w24, [sp, #96]
1331; NONEON-NOSVE-NEXT:    ldp w21, w22, [sp, #24]
1332; NONEON-NOSVE-NEXT:    stp w8, w10, [sp, #8] // 8-byte Folded Spill
1333; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #120]
1334; NONEON-NOSVE-NEXT:    stp q6, q0, [sp, #48]
1335; NONEON-NOSVE-NEXT:    ldp w19, w20, [sp, #16]
1336; NONEON-NOSVE-NEXT:    add w8, w8, w8
1337; NONEON-NOSVE-NEXT:    add w9, w9, w9
1338; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #176] // 16-byte Folded Spill
1339; NONEON-NOSVE-NEXT:    strb w8, [sp, #155]
1340; NONEON-NOSVE-NEXT:    add w8, w28, w28
1341; NONEON-NOSVE-NEXT:    strb w9, [sp, #154]
1342; NONEON-NOSVE-NEXT:    add w9, w27, w27
1343; NONEON-NOSVE-NEXT:    strb w8, [sp, #153]
1344; NONEON-NOSVE-NEXT:    add w8, w26, w26
1345; NONEON-NOSVE-NEXT:    strb w9, [sp, #152]
1346; NONEON-NOSVE-NEXT:    add w9, w25, w25
1347; NONEON-NOSVE-NEXT:    ldp w4, w7, [sp, #56]
1348; NONEON-NOSVE-NEXT:    strb w8, [sp, #151]
1349; NONEON-NOSVE-NEXT:    add w8, w24, w24
1350; NONEON-NOSVE-NEXT:    strb w9, [sp, #150]
1351; NONEON-NOSVE-NEXT:    add w9, w23, w23
1352; NONEON-NOSVE-NEXT:    ldp w2, w3, [sp, #48]
1353; NONEON-NOSVE-NEXT:    strb w8, [sp, #149]
1354; NONEON-NOSVE-NEXT:    add w8, w22, w22
1355; NONEON-NOSVE-NEXT:    strb w9, [sp, #148]
1356; NONEON-NOSVE-NEXT:    add w9, w21, w21
1357; NONEON-NOSVE-NEXT:    ldp w18, w0, [sp, #40]
1358; NONEON-NOSVE-NEXT:    strb w8, [sp, #147]
1359; NONEON-NOSVE-NEXT:    add w8, w20, w20
1360; NONEON-NOSVE-NEXT:    strb w9, [sp, #146]
1361; NONEON-NOSVE-NEXT:    add w9, w19, w19
1362; NONEON-NOSVE-NEXT:    ldp w16, w17, [sp, #32]
1363; NONEON-NOSVE-NEXT:    strb w8, [sp, #145]
1364; NONEON-NOSVE-NEXT:    add w8, w7, w7
1365; NONEON-NOSVE-NEXT:    strb w9, [sp, #144]
1366; NONEON-NOSVE-NEXT:    add w9, w4, w4
1367; NONEON-NOSVE-NEXT:    ldp w14, w15, [sp, #72]
1368; NONEON-NOSVE-NEXT:    strb w8, [sp, #175]
1369; NONEON-NOSVE-NEXT:    add w8, w3, w3
1370; NONEON-NOSVE-NEXT:    strb w9, [sp, #174]
1371; NONEON-NOSVE-NEXT:    add w9, w2, w2
1372; NONEON-NOSVE-NEXT:    ldp w12, w13, [sp, #64]
1373; NONEON-NOSVE-NEXT:    strb w8, [sp, #173]
1374; NONEON-NOSVE-NEXT:    add w8, w0, w0
1375; NONEON-NOSVE-NEXT:    strb w9, [sp, #172]
1376; NONEON-NOSVE-NEXT:    add w9, w18, w18
1377; NONEON-NOSVE-NEXT:    ldp w10, w11, [sp, #136]
1378; NONEON-NOSVE-NEXT:    strb w8, [sp, #171]
1379; NONEON-NOSVE-NEXT:    add w8, w17, w17
1380; NONEON-NOSVE-NEXT:    strb w9, [sp, #170]
1381; NONEON-NOSVE-NEXT:    add w9, w16, w16
1382; NONEON-NOSVE-NEXT:    strb w8, [sp, #169]
1383; NONEON-NOSVE-NEXT:    add w8, w15, w15
1384; NONEON-NOSVE-NEXT:    strb w9, [sp, #168]
1385; NONEON-NOSVE-NEXT:    add w9, w14, w14
1386; NONEON-NOSVE-NEXT:    strb w8, [sp, #167]
1387; NONEON-NOSVE-NEXT:    add w8, w13, w13
1388; NONEON-NOSVE-NEXT:    strb w9, [sp, #166]
1389; NONEON-NOSVE-NEXT:    add w9, w12, w12
1390; NONEON-NOSVE-NEXT:    ldp w29, w30, [sp, #80]
1391; NONEON-NOSVE-NEXT:    strb w8, [sp, #165]
1392; NONEON-NOSVE-NEXT:    add w8, w11, w11
1393; NONEON-NOSVE-NEXT:    strb w9, [sp, #164]
1394; NONEON-NOSVE-NEXT:    add w9, w10, w10
1395; NONEON-NOSVE-NEXT:    strb w8, [sp, #163]
1396; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8] // 4-byte Folded Reload
1397; NONEON-NOSVE-NEXT:    strb w9, [sp, #162]
1398; NONEON-NOSVE-NEXT:    ldr w9, [sp, #12] // 4-byte Folded Reload
1399; NONEON-NOSVE-NEXT:    strb w5, [sp, #159]
1400; NONEON-NOSVE-NEXT:    add w5, w30, w30
1401; NONEON-NOSVE-NEXT:    add w8, w8, w8
1402; NONEON-NOSVE-NEXT:    strb w6, [sp, #158]
1403; NONEON-NOSVE-NEXT:    add w6, w29, w29
1404; NONEON-NOSVE-NEXT:    add w9, w9, w9
1405; NONEON-NOSVE-NEXT:    strb w5, [sp, #157]
1406; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #256] // 16-byte Folded Reload
1407; NONEON-NOSVE-NEXT:    strb w6, [sp, #156]
1408; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #240] // 16-byte Folded Reload
1409; NONEON-NOSVE-NEXT:    strb w8, [sp, #161]
1410; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #224] // 16-byte Folded Reload
1411; NONEON-NOSVE-NEXT:    strb w9, [sp, #160]
1412; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #208] // 16-byte Folded Reload
1413; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #144]
1414; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #192] // 16-byte Folded Reload
1415; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #176] // 16-byte Folded Reload
1416; NONEON-NOSVE-NEXT:    stp q1, q0, [x1]
1417; NONEON-NOSVE-NEXT:    add sp, sp, #272
1418; NONEON-NOSVE-NEXT:    ret
1419  %a = load <32 x i32>, ptr %in
1420  %b = trunc <32 x i32> %a to <32 x i8>
1421  %c = add <32 x i8> %b, %b
1422  store <32 x i8> %c, ptr %out
1423  ret void
1424}
1425
1426; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
1427define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind {
1428; CHECK-LABEL: trunc_v64i32_v64i8:
1429; CHECK:       // %bb.0:
1430; CHECK-NEXT:    ldp q2, q3, [x0, #160]
1431; CHECK-NEXT:    ptrue p0.h, vl4
1432; CHECK-NEXT:    ldp q4, q5, [x0, #96]
1433; CHECK-NEXT:    ldp q6, q7, [x0]
1434; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
1435; CHECK-NEXT:    ldp q3, q18, [x0, #128]
1436; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
1437; CHECK-NEXT:    ldp q2, q19, [x0, #192]
1438; CHECK-NEXT:    ldp q0, q1, [x0, #64]
1439; CHECK-NEXT:    uzp1 z21.h, z18.h, z18.h
1440; CHECK-NEXT:    ldp q18, q22, [x0, #224]
1441; CHECK-NEXT:    uzp1 z20.h, z3.h, z3.h
1442; CHECK-NEXT:    ldp q3, q23, [x0, #32]
1443; CHECK-NEXT:    splice z16.h, p0, { z16.h, z17.h }
1444; CHECK-NEXT:    uzp1 z27.h, z19.h, z19.h
1445; CHECK-NEXT:    uzp1 z25.h, z22.h, z22.h
1446; CHECK-NEXT:    uzp1 z26.h, z2.h, z2.h
1447; CHECK-NEXT:    uzp1 z24.h, z18.h, z18.h
1448; CHECK-NEXT:    uzp1 z18.h, z23.h, z23.h
1449; CHECK-NEXT:    uzp1 z23.h, z5.h, z5.h
1450; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
1451; CHECK-NEXT:    uzp1 z3.h, z7.h, z7.h
1452; CHECK-NEXT:    uzp1 z22.h, z4.h, z4.h
1453; CHECK-NEXT:    uzp1 z2.h, z6.h, z6.h
1454; CHECK-NEXT:    uzp1 z5.h, z1.h, z1.h
1455; CHECK-NEXT:    splice z1.h, p0, { z20.h, z21.h }
1456; CHECK-NEXT:    splice z6.h, p0, { z24.h, z25.h }
1457; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
1458; CHECK-NEXT:    splice z0.h, p0, { z26.h, z27.h }
1459; CHECK-NEXT:    splice z7.h, p0, { z17.h, z18.h }
1460; CHECK-NEXT:    uzp1 z17.b, z16.b, z16.b
1461; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
1462; CHECK-NEXT:    splice z3.h, p0, { z22.h, z23.h }
1463; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
1464; CHECK-NEXT:    uzp1 z16.b, z1.b, z1.b
1465; CHECK-NEXT:    ptrue p0.b, vl8
1466; CHECK-NEXT:    uzp1 z6.b, z6.b, z6.b
1467; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
1468; CHECK-NEXT:    uzp1 z1.b, z7.b, z7.b
1469; CHECK-NEXT:    uzp1 z0.b, z2.b, z2.b
1470; CHECK-NEXT:    uzp1 z3.b, z3.b, z3.b
1471; CHECK-NEXT:    splice z7.b, p0, { z16.b, z17.b }
1472; CHECK-NEXT:    uzp1 z2.b, z4.b, z4.b
1473; CHECK-NEXT:    splice z4.b, p0, { z5.b, z6.b }
1474; CHECK-NEXT:    splice z0.b, p0, { z0.b, z1.b }
1475; CHECK-NEXT:    splice z1.b, p0, { z2.b, z3.b }
1476; CHECK-NEXT:    add z2.b, z7.b, z7.b
1477; CHECK-NEXT:    add z3.b, z4.b, z4.b
1478; CHECK-NEXT:    add z0.b, z0.b, z0.b
1479; CHECK-NEXT:    add z1.b, z1.b, z1.b
1480; CHECK-NEXT:    stp q2, q3, [x1, #32]
1481; CHECK-NEXT:    stp q0, q1, [x1]
1482; CHECK-NEXT:    ret
1483;
1484; NONEON-NOSVE-LABEL: trunc_v64i32_v64i8:
1485; NONEON-NOSVE:       // %bb.0:
1486; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
1487; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #16] // 16-byte Folded Spill
1488; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #32] // 16-byte Folded Spill
1489; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
1490; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
1491; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
1492; NONEON-NOSVE-NEXT:    sub sp, sp, #480
1493; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0, #96]
1494; NONEON-NOSVE-NEXT:    str x1, [sp, #152] // 8-byte Folded Spill
1495; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0, #64]
1496; NONEON-NOSVE-NEXT:    ldp q17, q16, [x0, #128]
1497; NONEON-NOSVE-NEXT:    ldp q5, q4, [x0, #32]
1498; NONEON-NOSVE-NEXT:    ldp q7, q6, [x0]
1499; NONEON-NOSVE-NEXT:    ldp q19, q18, [x0, #224]
1500; NONEON-NOSVE-NEXT:    ldp q21, q20, [x0, #192]
1501; NONEON-NOSVE-NEXT:    ldp q23, q22, [x0, #160]
1502; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #288]
1503; NONEON-NOSVE-NEXT:    ldr w8, [sp, #316]
1504; NONEON-NOSVE-NEXT:    str q18, [sp, #208]
1505; NONEON-NOSVE-NEXT:    ldr w10, [sp, #304]
1506; NONEON-NOSVE-NEXT:    stp q21, q19, [sp, #176]
1507; NONEON-NOSVE-NEXT:    ldr w11, [sp, #296]
1508; NONEON-NOSVE-NEXT:    ldr w12, [sp, #292]
1509; NONEON-NOSVE-NEXT:    add w20, w8, w8
1510; NONEON-NOSVE-NEXT:    stp q20, q23, [sp, #224]
1511; NONEON-NOSVE-NEXT:    ldr w13, [sp, #288]
1512; NONEON-NOSVE-NEXT:    stp q22, q16, [sp, #256]
1513; NONEON-NOSVE-NEXT:    ldr w22, [sp, #312]
1514; NONEON-NOSVE-NEXT:    stp q3, q17, [sp, #384]
1515; NONEON-NOSVE-NEXT:    ldr w9, [sp, #400]
1516; NONEON-NOSVE-NEXT:    ldr w8, [sp, #404]
1517; NONEON-NOSVE-NEXT:    str q7, [sp, #160]
1518; NONEON-NOSVE-NEXT:    stp q2, q4, [sp, #320]
1519; NONEON-NOSVE-NEXT:    ldr w18, [sp, #396]
1520; NONEON-NOSVE-NEXT:    ldr w0, [sp, #392]
1521; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #144] // 8-byte Folded Spill
1522; NONEON-NOSVE-NEXT:    ldr w9, [sp, #408]
1523; NONEON-NOSVE-NEXT:    ldr w8, [sp, #412]
1524; NONEON-NOSVE-NEXT:    ldr w14, [sp, #332]
1525; NONEON-NOSVE-NEXT:    ldr w15, [sp, #328]
1526; NONEON-NOSVE-NEXT:    ldr w16, [sp, #324]
1527; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #136] // 8-byte Folded Spill
1528; NONEON-NOSVE-NEXT:    ldr w9, [sp, #272]
1529; NONEON-NOSVE-NEXT:    ldr w8, [sp, #276]
1530; NONEON-NOSVE-NEXT:    ldr w17, [sp, #320]
1531; NONEON-NOSVE-NEXT:    ldr w1, [sp, #388]
1532; NONEON-NOSVE-NEXT:    ldr w2, [sp, #384]
1533; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128] // 8-byte Folded Spill
1534; NONEON-NOSVE-NEXT:    ldr w9, [sp, #280]
1535; NONEON-NOSVE-NEXT:    ldr w8, [sp, #284]
1536; NONEON-NOSVE-NEXT:    ldr w3, [sp, #348]
1537; NONEON-NOSVE-NEXT:    ldr w4, [sp, #344]
1538; NONEON-NOSVE-NEXT:    ldr w5, [sp, #340]
1539; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120] // 8-byte Folded Spill
1540; NONEON-NOSVE-NEXT:    ldr w6, [sp, #336]
1541; NONEON-NOSVE-NEXT:    stp q6, q5, [sp, #352]
1542; NONEON-NOSVE-NEXT:    ldr w7, [sp, #380]
1543; NONEON-NOSVE-NEXT:    ldr w19, [sp, #376]
1544; NONEON-NOSVE-NEXT:    ldr w21, [sp, #372]
1545; NONEON-NOSVE-NEXT:    ldr w23, [sp, #368]
1546; NONEON-NOSVE-NEXT:    ldr w24, [sp, #364]
1547; NONEON-NOSVE-NEXT:    ldr w25, [sp, #360]
1548; NONEON-NOSVE-NEXT:    ldr w26, [sp, #356]
1549; NONEON-NOSVE-NEXT:    ldr w27, [sp, #352]
1550; NONEON-NOSVE-NEXT:    strb w20, [sp, #463]
1551; NONEON-NOSVE-NEXT:    add w20, w22, w22
1552; NONEON-NOSVE-NEXT:    strb w20, [sp, #462]
1553; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #240]
1554; NONEON-NOSVE-NEXT:    ldp w29, w28, [sp, #168]
1555; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112] // 8-byte Folded Spill
1556; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #248]
1557; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104] // 8-byte Folded Spill
1558; NONEON-NOSVE-NEXT:    ldr w9, [sp, #256]
1559; NONEON-NOSVE-NEXT:    ldr w8, [sp, #260]
1560; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #96] // 8-byte Folded Spill
1561; NONEON-NOSVE-NEXT:    ldr w9, [sp, #264]
1562; NONEON-NOSVE-NEXT:    ldr w8, [sp, #268]
1563; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #88] // 8-byte Folded Spill
1564; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #176]
1565; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #80] // 8-byte Folded Spill
1566; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #184]
1567; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #72] // 8-byte Folded Spill
1568; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #224]
1569; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #64] // 8-byte Folded Spill
1570; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #232]
1571; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56] // 8-byte Folded Spill
1572; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #192]
1573; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48] // 8-byte Folded Spill
1574; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #200]
1575; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40] // 8-byte Folded Spill
1576; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #208]
1577; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32] // 8-byte Folded Spill
1578; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #216]
1579; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24] // 8-byte Folded Spill
1580; NONEON-NOSVE-NEXT:    ldr w9, [sp, #300]
1581; NONEON-NOSVE-NEXT:    ldp w8, w30, [sp, #160]
1582; NONEON-NOSVE-NEXT:    str w8, [sp, #20] // 4-byte Folded Spill
1583; NONEON-NOSVE-NEXT:    ldr w8, [sp, #308]
1584; NONEON-NOSVE-NEXT:    add w8, w8, w8
1585; NONEON-NOSVE-NEXT:    strb w8, [sp, #461]
1586; NONEON-NOSVE-NEXT:    add w8, w10, w10
1587; NONEON-NOSVE-NEXT:    strb w8, [sp, #460]
1588; NONEON-NOSVE-NEXT:    add w8, w9, w9
1589; NONEON-NOSVE-NEXT:    strb w8, [sp, #459]
1590; NONEON-NOSVE-NEXT:    add w8, w11, w11
1591; NONEON-NOSVE-NEXT:    strb w8, [sp, #458]
1592; NONEON-NOSVE-NEXT:    add w8, w12, w12
1593; NONEON-NOSVE-NEXT:    strb w8, [sp, #457]
1594; NONEON-NOSVE-NEXT:    add w8, w13, w13
1595; NONEON-NOSVE-NEXT:    strb w8, [sp, #456]
1596; NONEON-NOSVE-NEXT:    add w8, w14, w14
1597; NONEON-NOSVE-NEXT:    strb w8, [sp, #455]
1598; NONEON-NOSVE-NEXT:    add w8, w15, w15
1599; NONEON-NOSVE-NEXT:    strb w8, [sp, #454]
1600; NONEON-NOSVE-NEXT:    add w8, w16, w16
1601; NONEON-NOSVE-NEXT:    strb w8, [sp, #453]
1602; NONEON-NOSVE-NEXT:    add w8, w17, w17
1603; NONEON-NOSVE-NEXT:    strb w8, [sp, #452]
1604; NONEON-NOSVE-NEXT:    add w8, w18, w18
1605; NONEON-NOSVE-NEXT:    strb w8, [sp, #451]
1606; NONEON-NOSVE-NEXT:    add w8, w0, w0
1607; NONEON-NOSVE-NEXT:    strb w8, [sp, #450]
1608; NONEON-NOSVE-NEXT:    add w8, w1, w1
1609; NONEON-NOSVE-NEXT:    strb w8, [sp, #449]
1610; NONEON-NOSVE-NEXT:    add w8, w2, w2
1611; NONEON-NOSVE-NEXT:    strb w8, [sp, #448]
1612; NONEON-NOSVE-NEXT:    add w8, w3, w3
1613; NONEON-NOSVE-NEXT:    strb w8, [sp, #447]
1614; NONEON-NOSVE-NEXT:    add w8, w4, w4
1615; NONEON-NOSVE-NEXT:    strb w8, [sp, #446]
1616; NONEON-NOSVE-NEXT:    add w8, w5, w5
1617; NONEON-NOSVE-NEXT:    strb w8, [sp, #445]
1618; NONEON-NOSVE-NEXT:    add w8, w6, w6
1619; NONEON-NOSVE-NEXT:    strb w8, [sp, #444]
1620; NONEON-NOSVE-NEXT:    add w8, w7, w7
1621; NONEON-NOSVE-NEXT:    strb w8, [sp, #443]
1622; NONEON-NOSVE-NEXT:    add w8, w19, w19
1623; NONEON-NOSVE-NEXT:    strb w8, [sp, #442]
1624; NONEON-NOSVE-NEXT:    add w8, w21, w21
1625; NONEON-NOSVE-NEXT:    strb w8, [sp, #441]
1626; NONEON-NOSVE-NEXT:    add w8, w23, w23
1627; NONEON-NOSVE-NEXT:    strb w8, [sp, #440]
1628; NONEON-NOSVE-NEXT:    add w8, w24, w24
1629; NONEON-NOSVE-NEXT:    strb w8, [sp, #439]
1630; NONEON-NOSVE-NEXT:    add w8, w25, w25
1631; NONEON-NOSVE-NEXT:    strb w8, [sp, #438]
1632; NONEON-NOSVE-NEXT:    add w8, w26, w26
1633; NONEON-NOSVE-NEXT:    strb w8, [sp, #437]
1634; NONEON-NOSVE-NEXT:    add w8, w27, w27
1635; NONEON-NOSVE-NEXT:    strb w8, [sp, #436]
1636; NONEON-NOSVE-NEXT:    add w8, w28, w28
1637; NONEON-NOSVE-NEXT:    strb w8, [sp, #435]
1638; NONEON-NOSVE-NEXT:    add w8, w29, w29
1639; NONEON-NOSVE-NEXT:    strb w8, [sp, #434]
1640; NONEON-NOSVE-NEXT:    add w8, w30, w30
1641; NONEON-NOSVE-NEXT:    strb w8, [sp, #433]
1642; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20] // 4-byte Folded Reload
1643; NONEON-NOSVE-NEXT:    add w8, w8, w8
1644; NONEON-NOSVE-NEXT:    strb w8, [sp, #432]
1645; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24] // 4-byte Folded Reload
1646; NONEON-NOSVE-NEXT:    add w8, w8, w8
1647; NONEON-NOSVE-NEXT:    strb w8, [sp, #431]
1648; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28] // 4-byte Folded Reload
1649; NONEON-NOSVE-NEXT:    add w8, w8, w8
1650; NONEON-NOSVE-NEXT:    strb w8, [sp, #430]
1651; NONEON-NOSVE-NEXT:    ldr w8, [sp, #32] // 4-byte Folded Reload
1652; NONEON-NOSVE-NEXT:    add w8, w8, w8
1653; NONEON-NOSVE-NEXT:    strb w8, [sp, #429]
1654; NONEON-NOSVE-NEXT:    ldr w8, [sp, #36] // 4-byte Folded Reload
1655; NONEON-NOSVE-NEXT:    add w8, w8, w8
1656; NONEON-NOSVE-NEXT:    strb w8, [sp, #428]
1657; NONEON-NOSVE-NEXT:    ldr w8, [sp, #40] // 4-byte Folded Reload
1658; NONEON-NOSVE-NEXT:    add w8, w8, w8
1659; NONEON-NOSVE-NEXT:    strb w8, [sp, #427]
1660; NONEON-NOSVE-NEXT:    ldr w8, [sp, #44] // 4-byte Folded Reload
1661; NONEON-NOSVE-NEXT:    add w8, w8, w8
1662; NONEON-NOSVE-NEXT:    strb w8, [sp, #426]
1663; NONEON-NOSVE-NEXT:    ldr w8, [sp, #48] // 4-byte Folded Reload
1664; NONEON-NOSVE-NEXT:    add w8, w8, w8
1665; NONEON-NOSVE-NEXT:    strb w8, [sp, #425]
1666; NONEON-NOSVE-NEXT:    ldr w8, [sp, #52] // 4-byte Folded Reload
1667; NONEON-NOSVE-NEXT:    add w8, w8, w8
1668; NONEON-NOSVE-NEXT:    strb w8, [sp, #424]
1669; NONEON-NOSVE-NEXT:    ldr w8, [sp, #56] // 4-byte Folded Reload
1670; NONEON-NOSVE-NEXT:    add w8, w8, w8
1671; NONEON-NOSVE-NEXT:    strb w8, [sp, #423]
1672; NONEON-NOSVE-NEXT:    ldr w8, [sp, #60] // 4-byte Folded Reload
1673; NONEON-NOSVE-NEXT:    add w8, w8, w8
1674; NONEON-NOSVE-NEXT:    strb w8, [sp, #422]
1675; NONEON-NOSVE-NEXT:    ldr w8, [sp, #64] // 4-byte Folded Reload
1676; NONEON-NOSVE-NEXT:    add w8, w8, w8
1677; NONEON-NOSVE-NEXT:    strb w8, [sp, #421]
1678; NONEON-NOSVE-NEXT:    ldr w8, [sp, #68] // 4-byte Folded Reload
1679; NONEON-NOSVE-NEXT:    add w8, w8, w8
1680; NONEON-NOSVE-NEXT:    strb w8, [sp, #420]
1681; NONEON-NOSVE-NEXT:    ldr w8, [sp, #72] // 4-byte Folded Reload
1682; NONEON-NOSVE-NEXT:    add w8, w8, w8
1683; NONEON-NOSVE-NEXT:    strb w8, [sp, #419]
1684; NONEON-NOSVE-NEXT:    ldr w8, [sp, #76] // 4-byte Folded Reload
1685; NONEON-NOSVE-NEXT:    add w8, w8, w8
1686; NONEON-NOSVE-NEXT:    strb w8, [sp, #418]
1687; NONEON-NOSVE-NEXT:    ldr w8, [sp, #80] // 4-byte Folded Reload
1688; NONEON-NOSVE-NEXT:    add w8, w8, w8
1689; NONEON-NOSVE-NEXT:    strb w8, [sp, #417]
1690; NONEON-NOSVE-NEXT:    ldr w8, [sp, #84] // 4-byte Folded Reload
1691; NONEON-NOSVE-NEXT:    add w8, w8, w8
1692; NONEON-NOSVE-NEXT:    strb w8, [sp, #416]
1693; NONEON-NOSVE-NEXT:    ldr w8, [sp, #88] // 4-byte Folded Reload
1694; NONEON-NOSVE-NEXT:    ldp q1, q3, [sp, #416]
1695; NONEON-NOSVE-NEXT:    add w8, w8, w8
1696; NONEON-NOSVE-NEXT:    strb w8, [sp, #479]
1697; NONEON-NOSVE-NEXT:    ldr w8, [sp, #92] // 4-byte Folded Reload
1698; NONEON-NOSVE-NEXT:    add w8, w8, w8
1699; NONEON-NOSVE-NEXT:    strb w8, [sp, #478]
1700; NONEON-NOSVE-NEXT:    ldr w8, [sp, #96] // 4-byte Folded Reload
1701; NONEON-NOSVE-NEXT:    add w8, w8, w8
1702; NONEON-NOSVE-NEXT:    strb w8, [sp, #477]
1703; NONEON-NOSVE-NEXT:    ldr w8, [sp, #100] // 4-byte Folded Reload
1704; NONEON-NOSVE-NEXT:    add w8, w8, w8
1705; NONEON-NOSVE-NEXT:    strb w8, [sp, #476]
1706; NONEON-NOSVE-NEXT:    ldr w8, [sp, #104] // 4-byte Folded Reload
1707; NONEON-NOSVE-NEXT:    add w8, w8, w8
1708; NONEON-NOSVE-NEXT:    strb w8, [sp, #475]
1709; NONEON-NOSVE-NEXT:    ldr w8, [sp, #108] // 4-byte Folded Reload
1710; NONEON-NOSVE-NEXT:    add w8, w8, w8
1711; NONEON-NOSVE-NEXT:    strb w8, [sp, #474]
1712; NONEON-NOSVE-NEXT:    ldr w8, [sp, #112] // 4-byte Folded Reload
1713; NONEON-NOSVE-NEXT:    add w8, w8, w8
1714; NONEON-NOSVE-NEXT:    strb w8, [sp, #473]
1715; NONEON-NOSVE-NEXT:    ldr w8, [sp, #116] // 4-byte Folded Reload
1716; NONEON-NOSVE-NEXT:    add w8, w8, w8
1717; NONEON-NOSVE-NEXT:    strb w8, [sp, #472]
1718; NONEON-NOSVE-NEXT:    ldr w8, [sp, #120] // 4-byte Folded Reload
1719; NONEON-NOSVE-NEXT:    add w8, w8, w8
1720; NONEON-NOSVE-NEXT:    strb w8, [sp, #471]
1721; NONEON-NOSVE-NEXT:    ldr w8, [sp, #124] // 4-byte Folded Reload
1722; NONEON-NOSVE-NEXT:    add w8, w8, w8
1723; NONEON-NOSVE-NEXT:    strb w8, [sp, #470]
1724; NONEON-NOSVE-NEXT:    ldr w8, [sp, #128] // 4-byte Folded Reload
1725; NONEON-NOSVE-NEXT:    add w8, w8, w8
1726; NONEON-NOSVE-NEXT:    strb w8, [sp, #469]
1727; NONEON-NOSVE-NEXT:    ldr w8, [sp, #132] // 4-byte Folded Reload
1728; NONEON-NOSVE-NEXT:    add w8, w8, w8
1729; NONEON-NOSVE-NEXT:    strb w8, [sp, #468]
1730; NONEON-NOSVE-NEXT:    ldr w8, [sp, #136] // 4-byte Folded Reload
1731; NONEON-NOSVE-NEXT:    add w8, w8, w8
1732; NONEON-NOSVE-NEXT:    strb w8, [sp, #467]
1733; NONEON-NOSVE-NEXT:    ldr w8, [sp, #140] // 4-byte Folded Reload
1734; NONEON-NOSVE-NEXT:    add w8, w8, w8
1735; NONEON-NOSVE-NEXT:    strb w8, [sp, #466]
1736; NONEON-NOSVE-NEXT:    ldr w8, [sp, #144] // 4-byte Folded Reload
1737; NONEON-NOSVE-NEXT:    add w8, w8, w8
1738; NONEON-NOSVE-NEXT:    strb w8, [sp, #465]
1739; NONEON-NOSVE-NEXT:    ldr w8, [sp, #148] // 4-byte Folded Reload
1740; NONEON-NOSVE-NEXT:    add w8, w8, w8
1741; NONEON-NOSVE-NEXT:    strb w8, [sp, #464]
1742; NONEON-NOSVE-NEXT:    ldr x8, [sp, #152] // 8-byte Folded Reload
1743; NONEON-NOSVE-NEXT:    ldp q2, q0, [sp, #448]
1744; NONEON-NOSVE-NEXT:    stp q3, q2, [x8]
1745; NONEON-NOSVE-NEXT:    stp q0, q1, [x8, #32]
1746; NONEON-NOSVE-NEXT:    add sp, sp, #480
1747; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
1748; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
1749; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
1750; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #32] // 16-byte Folded Reload
1751; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #16] // 16-byte Folded Reload
1752; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp], #96 // 16-byte Folded Reload
1753; NONEON-NOSVE-NEXT:    ret
1754  %a = load <64 x i32>, ptr %in
1755  %b = trunc <64 x i32> %a to <64 x i8>
1756  %c = add <64 x i8> %b, %b
1757  store <64 x i8> %c, ptr %out
1758  ret void
1759}
1760
1761;
1762; truncate i32 -> i16
1763;
1764
1765define <8 x i16> @trunc_v8i32_v8i16(ptr %in) nounwind {
1766; CHECK-LABEL: trunc_v8i32_v8i16:
1767; CHECK:       // %bb.0:
1768; CHECK-NEXT:    ldp q1, q0, [x0]
1769; CHECK-NEXT:    ptrue p0.h, vl4
1770; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
1771; CHECK-NEXT:    uzp1 z2.h, z1.h, z1.h
1772; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
1773; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
1774; CHECK-NEXT:    ret
1775;
1776; NONEON-NOSVE-LABEL: trunc_v8i32_v8i16:
1777; NONEON-NOSVE:       // %bb.0:
1778; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
1779; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-48]!
1780; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #24]
1781; NONEON-NOSVE-NEXT:    strh w9, [sp, #46]
1782; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
1783; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #16]
1784; NONEON-NOSVE-NEXT:    strh w9, [sp, #42]
1785; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
1786; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #8]
1787; NONEON-NOSVE-NEXT:    strh w9, [sp, #38]
1788; NONEON-NOSVE-NEXT:    strh w8, [sp, #36]
1789; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp]
1790; NONEON-NOSVE-NEXT:    strh w9, [sp, #34]
1791; NONEON-NOSVE-NEXT:    strh w8, [sp, #32]
1792; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
1793; NONEON-NOSVE-NEXT:    add sp, sp, #48
1794; NONEON-NOSVE-NEXT:    ret
1795  %a = load <8 x i32>, ptr %in
1796  %b = trunc <8 x i32> %a to <8 x i16>
1797  ret <8 x i16> %b
1798}
1799
1800; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
1801define void @trunc_v16i32_v16i16(ptr %in, ptr %out) nounwind {
1802; CHECK-LABEL: trunc_v16i32_v16i16:
1803; CHECK:       // %bb.0:
1804; CHECK-NEXT:    ldp q1, q0, [x0, #32]
1805; CHECK-NEXT:    ptrue p0.h, vl4
1806; CHECK-NEXT:    ldp q3, q2, [x0]
1807; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
1808; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
1809; CHECK-NEXT:    uzp1 z1.h, z2.h, z2.h
1810; CHECK-NEXT:    uzp1 z0.h, z3.h, z3.h
1811; CHECK-NEXT:    splice z2.h, p0, { z4.h, z5.h }
1812; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
1813; CHECK-NEXT:    add z1.h, z2.h, z2.h
1814; CHECK-NEXT:    add z0.h, z0.h, z0.h
1815; CHECK-NEXT:    stp q0, q1, [x1]
1816; CHECK-NEXT:    ret
1817;
1818; NONEON-NOSVE-LABEL: trunc_v16i32_v16i16:
1819; NONEON-NOSVE:       // %bb.0:
1820; NONEON-NOSVE-NEXT:    sub sp, sp, #96
1821; NONEON-NOSVE-NEXT:    ldp q0, q1, [x0, #32]
1822; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0]
1823; NONEON-NOSVE-NEXT:    stp q3, q1, [sp]
1824; NONEON-NOSVE-NEXT:    stp q2, q0, [sp, #32]
1825; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #40]
1826; NONEON-NOSVE-NEXT:    ldp w2, w3, [sp, #32]
1827; NONEON-NOSVE-NEXT:    ldp w4, w5, [sp, #8]
1828; NONEON-NOSVE-NEXT:    add w8, w8, w8
1829; NONEON-NOSVE-NEXT:    add w9, w9, w9
1830; NONEON-NOSVE-NEXT:    ldp w18, w0, [sp]
1831; NONEON-NOSVE-NEXT:    ldp w16, w17, [sp, #24]
1832; NONEON-NOSVE-NEXT:    strh w8, [sp, #78]
1833; NONEON-NOSVE-NEXT:    add w8, w3, w3
1834; NONEON-NOSVE-NEXT:    strh w9, [sp, #76]
1835; NONEON-NOSVE-NEXT:    add w9, w2, w2
1836; NONEON-NOSVE-NEXT:    ldp w14, w15, [sp, #16]
1837; NONEON-NOSVE-NEXT:    strh w8, [sp, #74]
1838; NONEON-NOSVE-NEXT:    add w8, w5, w5
1839; NONEON-NOSVE-NEXT:    strh w9, [sp, #72]
1840; NONEON-NOSVE-NEXT:    add w9, w4, w4
1841; NONEON-NOSVE-NEXT:    ldp w12, w13, [sp, #56]
1842; NONEON-NOSVE-NEXT:    strh w8, [sp, #70]
1843; NONEON-NOSVE-NEXT:    add w8, w0, w0
1844; NONEON-NOSVE-NEXT:    strh w9, [sp, #68]
1845; NONEON-NOSVE-NEXT:    add w9, w18, w18
1846; NONEON-NOSVE-NEXT:    ldp w10, w11, [sp, #48]
1847; NONEON-NOSVE-NEXT:    strh w8, [sp, #66]
1848; NONEON-NOSVE-NEXT:    add w8, w17, w17
1849; NONEON-NOSVE-NEXT:    strh w9, [sp, #64]
1850; NONEON-NOSVE-NEXT:    add w9, w16, w16
1851; NONEON-NOSVE-NEXT:    strh w8, [sp, #94]
1852; NONEON-NOSVE-NEXT:    add w8, w15, w15
1853; NONEON-NOSVE-NEXT:    strh w9, [sp, #92]
1854; NONEON-NOSVE-NEXT:    add w9, w14, w14
1855; NONEON-NOSVE-NEXT:    strh w8, [sp, #90]
1856; NONEON-NOSVE-NEXT:    add w8, w13, w13
1857; NONEON-NOSVE-NEXT:    strh w9, [sp, #88]
1858; NONEON-NOSVE-NEXT:    add w9, w12, w12
1859; NONEON-NOSVE-NEXT:    strh w8, [sp, #86]
1860; NONEON-NOSVE-NEXT:    add w8, w11, w11
1861; NONEON-NOSVE-NEXT:    strh w9, [sp, #84]
1862; NONEON-NOSVE-NEXT:    add w9, w10, w10
1863; NONEON-NOSVE-NEXT:    strh w8, [sp, #82]
1864; NONEON-NOSVE-NEXT:    strh w9, [sp, #80]
1865; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #64]
1866; NONEON-NOSVE-NEXT:    stp q1, q0, [x1]
1867; NONEON-NOSVE-NEXT:    add sp, sp, #96
1868; NONEON-NOSVE-NEXT:    ret
1869  %a = load <16 x i32>, ptr %in
1870  %b = trunc <16 x i32> %a to <16 x i16>
1871  %c = add <16 x i16> %b, %b
1872  store <16 x i16> %c, ptr %out
1873  ret void
1874}
1875
1876; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
1877define void @trunc_v32i32_v32i16(ptr %in, ptr %out) nounwind {
1878; CHECK-LABEL: trunc_v32i32_v32i16:
1879; CHECK:       // %bb.0:
1880; CHECK-NEXT:    ldp q1, q0, [x0, #64]
1881; CHECK-NEXT:    ptrue p0.h, vl4
1882; CHECK-NEXT:    ldp q2, q3, [x0, #96]
1883; CHECK-NEXT:    ldp q4, q5, [x0]
1884; CHECK-NEXT:    uzp1 z7.h, z0.h, z0.h
1885; CHECK-NEXT:    uzp1 z6.h, z1.h, z1.h
1886; CHECK-NEXT:    ldp q1, q0, [x0, #32]
1887; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
1888; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
1889; CHECK-NEXT:    uzp1 z3.h, z5.h, z5.h
1890; CHECK-NEXT:    uzp1 z2.h, z4.h, z4.h
1891; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
1892; CHECK-NEXT:    splice z0.h, p0, { z6.h, z7.h }
1893; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
1894; CHECK-NEXT:    splice z1.h, p0, { z16.h, z17.h }
1895; CHECK-NEXT:    splice z2.h, p0, { z2.h, z3.h }
1896; CHECK-NEXT:    splice z3.h, p0, { z4.h, z5.h }
1897; CHECK-NEXT:    add z0.h, z0.h, z0.h
1898; CHECK-NEXT:    add z1.h, z1.h, z1.h
1899; CHECK-NEXT:    add z2.h, z2.h, z2.h
1900; CHECK-NEXT:    add z3.h, z3.h, z3.h
1901; CHECK-NEXT:    stp q0, q1, [x1, #32]
1902; CHECK-NEXT:    stp q2, q3, [x1]
1903; CHECK-NEXT:    ret
1904;
1905; NONEON-NOSVE-LABEL: trunc_v32i32_v32i16:
1906; NONEON-NOSVE:       // %bb.0:
1907; NONEON-NOSVE-NEXT:    sub sp, sp, #304
1908; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0, #32]
1909; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #224] // 16-byte Folded Spill
1910; NONEON-NOSVE-NEXT:    ldp q5, q4, [x0]
1911; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #240] // 16-byte Folded Spill
1912; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #256] // 16-byte Folded Spill
1913; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0, #64]
1914; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #272] // 16-byte Folded Spill
1915; NONEON-NOSVE-NEXT:    ldp q7, q6, [x0, #96]
1916; NONEON-NOSVE-NEXT:    stp q2, q4, [sp, #80]
1917; NONEON-NOSVE-NEXT:    stp q3, q1, [sp, #112]
1918; NONEON-NOSVE-NEXT:    stp q5, q7, [sp, #16]
1919; NONEON-NOSVE-NEXT:    ldp w8, w9, [sp, #88]
1920; NONEON-NOSVE-NEXT:    ldp w27, w28, [sp, #112]
1921; NONEON-NOSVE-NEXT:    ldp w25, w26, [sp, #104]
1922; NONEON-NOSVE-NEXT:    add w6, w8, w8
1923; NONEON-NOSVE-NEXT:    add w5, w9, w9
1924; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #288] // 16-byte Folded Spill
1925; NONEON-NOSVE-NEXT:    ldp w10, w8, [sp, #128]
1926; NONEON-NOSVE-NEXT:    ldp w23, w24, [sp, #96]
1927; NONEON-NOSVE-NEXT:    ldp w21, w22, [sp, #24]
1928; NONEON-NOSVE-NEXT:    stp w8, w10, [sp, #8] // 8-byte Folded Spill
1929; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #120]
1930; NONEON-NOSVE-NEXT:    stp q6, q0, [sp, #48]
1931; NONEON-NOSVE-NEXT:    ldp w19, w20, [sp, #16]
1932; NONEON-NOSVE-NEXT:    add w8, w8, w8
1933; NONEON-NOSVE-NEXT:    add w9, w9, w9
1934; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #208] // 16-byte Folded Spill
1935; NONEON-NOSVE-NEXT:    strh w8, [sp, #182]
1936; NONEON-NOSVE-NEXT:    add w8, w28, w28
1937; NONEON-NOSVE-NEXT:    strh w9, [sp, #180]
1938; NONEON-NOSVE-NEXT:    add w9, w27, w27
1939; NONEON-NOSVE-NEXT:    strh w8, [sp, #178]
1940; NONEON-NOSVE-NEXT:    add w8, w26, w26
1941; NONEON-NOSVE-NEXT:    strh w9, [sp, #176]
1942; NONEON-NOSVE-NEXT:    add w9, w25, w25
1943; NONEON-NOSVE-NEXT:    ldp w4, w7, [sp, #56]
1944; NONEON-NOSVE-NEXT:    strh w8, [sp, #174]
1945; NONEON-NOSVE-NEXT:    add w8, w24, w24
1946; NONEON-NOSVE-NEXT:    strh w9, [sp, #172]
1947; NONEON-NOSVE-NEXT:    add w9, w23, w23
1948; NONEON-NOSVE-NEXT:    ldp w2, w3, [sp, #48]
1949; NONEON-NOSVE-NEXT:    strh w8, [sp, #170]
1950; NONEON-NOSVE-NEXT:    add w8, w22, w22
1951; NONEON-NOSVE-NEXT:    strh w9, [sp, #168]
1952; NONEON-NOSVE-NEXT:    add w9, w21, w21
1953; NONEON-NOSVE-NEXT:    ldp w18, w0, [sp, #40]
1954; NONEON-NOSVE-NEXT:    strh w8, [sp, #166]
1955; NONEON-NOSVE-NEXT:    add w8, w20, w20
1956; NONEON-NOSVE-NEXT:    strh w9, [sp, #164]
1957; NONEON-NOSVE-NEXT:    add w9, w19, w19
1958; NONEON-NOSVE-NEXT:    ldp w16, w17, [sp, #32]
1959; NONEON-NOSVE-NEXT:    strh w8, [sp, #162]
1960; NONEON-NOSVE-NEXT:    add w8, w7, w7
1961; NONEON-NOSVE-NEXT:    strh w9, [sp, #160]
1962; NONEON-NOSVE-NEXT:    add w9, w4, w4
1963; NONEON-NOSVE-NEXT:    ldp w14, w15, [sp, #72]
1964; NONEON-NOSVE-NEXT:    strh w8, [sp, #158]
1965; NONEON-NOSVE-NEXT:    add w8, w3, w3
1966; NONEON-NOSVE-NEXT:    strh w9, [sp, #156]
1967; NONEON-NOSVE-NEXT:    add w9, w2, w2
1968; NONEON-NOSVE-NEXT:    ldp w12, w13, [sp, #64]
1969; NONEON-NOSVE-NEXT:    strh w8, [sp, #154]
1970; NONEON-NOSVE-NEXT:    add w8, w0, w0
1971; NONEON-NOSVE-NEXT:    strh w9, [sp, #152]
1972; NONEON-NOSVE-NEXT:    add w9, w18, w18
1973; NONEON-NOSVE-NEXT:    ldp w10, w11, [sp, #136]
1974; NONEON-NOSVE-NEXT:    strh w8, [sp, #150]
1975; NONEON-NOSVE-NEXT:    add w8, w17, w17
1976; NONEON-NOSVE-NEXT:    strh w9, [sp, #148]
1977; NONEON-NOSVE-NEXT:    add w9, w16, w16
1978; NONEON-NOSVE-NEXT:    strh w8, [sp, #146]
1979; NONEON-NOSVE-NEXT:    add w8, w15, w15
1980; NONEON-NOSVE-NEXT:    strh w9, [sp, #144]
1981; NONEON-NOSVE-NEXT:    add w9, w14, w14
1982; NONEON-NOSVE-NEXT:    strh w8, [sp, #206]
1983; NONEON-NOSVE-NEXT:    add w8, w13, w13
1984; NONEON-NOSVE-NEXT:    strh w9, [sp, #204]
1985; NONEON-NOSVE-NEXT:    add w9, w12, w12
1986; NONEON-NOSVE-NEXT:    ldp w29, w30, [sp, #80]
1987; NONEON-NOSVE-NEXT:    strh w8, [sp, #202]
1988; NONEON-NOSVE-NEXT:    add w8, w11, w11
1989; NONEON-NOSVE-NEXT:    strh w9, [sp, #200]
1990; NONEON-NOSVE-NEXT:    add w9, w10, w10
1991; NONEON-NOSVE-NEXT:    strh w8, [sp, #198]
1992; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8] // 4-byte Folded Reload
1993; NONEON-NOSVE-NEXT:    strh w9, [sp, #196]
1994; NONEON-NOSVE-NEXT:    ldr w9, [sp, #12] // 4-byte Folded Reload
1995; NONEON-NOSVE-NEXT:    strh w5, [sp, #190]
1996; NONEON-NOSVE-NEXT:    add w5, w30, w30
1997; NONEON-NOSVE-NEXT:    add w8, w8, w8
1998; NONEON-NOSVE-NEXT:    strh w6, [sp, #188]
1999; NONEON-NOSVE-NEXT:    add w6, w29, w29
2000; NONEON-NOSVE-NEXT:    add w9, w9, w9
2001; NONEON-NOSVE-NEXT:    strh w5, [sp, #186]
2002; NONEON-NOSVE-NEXT:    ldp q1, q3, [sp, #144]
2003; NONEON-NOSVE-NEXT:    strh w6, [sp, #184]
2004; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #288] // 16-byte Folded Reload
2005; NONEON-NOSVE-NEXT:    strh w8, [sp, #194]
2006; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #272] // 16-byte Folded Reload
2007; NONEON-NOSVE-NEXT:    strh w9, [sp, #192]
2008; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #256] // 16-byte Folded Reload
2009; NONEON-NOSVE-NEXT:    ldp q2, q0, [sp, #176]
2010; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #240] // 16-byte Folded Reload
2011; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #224] // 16-byte Folded Reload
2012; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #208] // 16-byte Folded Reload
2013; NONEON-NOSVE-NEXT:    stp q3, q2, [x1]
2014; NONEON-NOSVE-NEXT:    stp q0, q1, [x1, #32]
2015; NONEON-NOSVE-NEXT:    add sp, sp, #304
2016; NONEON-NOSVE-NEXT:    ret
2017  %a = load <32 x i32>, ptr %in
2018  %b = trunc <32 x i32> %a to <32 x i16>
2019  %c = add <32 x i16> %b, %b
2020  store <32 x i16> %c, ptr %out
2021  ret void
2022}
2023
2024; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
2025define void @trunc_v64i32_v64i16(ptr %in, ptr %out) nounwind {
2026; CHECK-LABEL: trunc_v64i32_v64i16:
2027; CHECK:       // %bb.0:
2028; CHECK-NEXT:    ldp q2, q3, [x0, #192]
2029; CHECK-NEXT:    ptrue p0.h, vl4
2030; CHECK-NEXT:    ldp q4, q5, [x0]
2031; CHECK-NEXT:    ldp q6, q7, [x0, #64]
2032; CHECK-NEXT:    uzp1 z17.h, z3.h, z3.h
2033; CHECK-NEXT:    ldp q3, q18, [x0, #224]
2034; CHECK-NEXT:    uzp1 z16.h, z2.h, z2.h
2035; CHECK-NEXT:    ldp q2, q19, [x0, #128]
2036; CHECK-NEXT:    ldp q0, q1, [x0, #32]
2037; CHECK-NEXT:    uzp1 z21.h, z18.h, z18.h
2038; CHECK-NEXT:    ldp q18, q22, [x0, #160]
2039; CHECK-NEXT:    uzp1 z20.h, z3.h, z3.h
2040; CHECK-NEXT:    uzp1 z24.h, z19.h, z19.h
2041; CHECK-NEXT:    ldp q3, q19, [x0, #96]
2042; CHECK-NEXT:    uzp1 z23.h, z2.h, z2.h
2043; CHECK-NEXT:    uzp1 z26.h, z22.h, z22.h
2044; CHECK-NEXT:    splice z2.h, p0, { z16.h, z17.h }
2045; CHECK-NEXT:    uzp1 z17.h, z7.h, z7.h
2046; CHECK-NEXT:    uzp1 z25.h, z18.h, z18.h
2047; CHECK-NEXT:    splice z7.h, p0, { z20.h, z21.h }
2048; CHECK-NEXT:    uzp1 z21.h, z5.h, z5.h
2049; CHECK-NEXT:    uzp1 z19.h, z19.h, z19.h
2050; CHECK-NEXT:    uzp1 z20.h, z4.h, z4.h
2051; CHECK-NEXT:    uzp1 z5.h, z1.h, z1.h
2052; CHECK-NEXT:    uzp1 z16.h, z6.h, z6.h
2053; CHECK-NEXT:    splice z6.h, p0, { z23.h, z24.h }
2054; CHECK-NEXT:    uzp1 z18.h, z3.h, z3.h
2055; CHECK-NEXT:    splice z3.h, p0, { z25.h, z26.h }
2056; CHECK-NEXT:    uzp1 z4.h, z0.h, z0.h
2057; CHECK-NEXT:    add z0.h, z2.h, z2.h
2058; CHECK-NEXT:    add z7.h, z7.h, z7.h
2059; CHECK-NEXT:    splice z1.h, p0, { z16.h, z17.h }
2060; CHECK-NEXT:    splice z2.h, p0, { z18.h, z19.h }
2061; CHECK-NEXT:    splice z16.h, p0, { z20.h, z21.h }
2062; CHECK-NEXT:    splice z4.h, p0, { z4.h, z5.h }
2063; CHECK-NEXT:    add z6.h, z6.h, z6.h
2064; CHECK-NEXT:    add z3.h, z3.h, z3.h
2065; CHECK-NEXT:    stp q0, q7, [x1, #96]
2066; CHECK-NEXT:    add z0.h, z1.h, z1.h
2067; CHECK-NEXT:    add z1.h, z2.h, z2.h
2068; CHECK-NEXT:    add z2.h, z16.h, z16.h
2069; CHECK-NEXT:    stp q6, q3, [x1, #64]
2070; CHECK-NEXT:    add z3.h, z4.h, z4.h
2071; CHECK-NEXT:    stp q0, q1, [x1, #32]
2072; CHECK-NEXT:    stp q2, q3, [x1]
2073; CHECK-NEXT:    ret
2074;
2075; NONEON-NOSVE-LABEL: trunc_v64i32_v64i16:
2076; NONEON-NOSVE:       // %bb.0:
2077; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #-96]! // 16-byte Folded Spill
2078; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #16] // 16-byte Folded Spill
2079; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #32] // 16-byte Folded Spill
2080; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #48] // 16-byte Folded Spill
2081; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #64] // 16-byte Folded Spill
2082; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #80] // 16-byte Folded Spill
2083; NONEON-NOSVE-NEXT:    sub sp, sp, #528
2084; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0, #32]
2085; NONEON-NOSVE-NEXT:    mov x5, x1
2086; NONEON-NOSVE-NEXT:    ldp q17, q16, [x0, #192]
2087; NONEON-NOSVE-NEXT:    ldp q23, q22, [x0, #224]
2088; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0]
2089; NONEON-NOSVE-NEXT:    ldp q5, q4, [x0, #96]
2090; NONEON-NOSVE-NEXT:    ldp q7, q6, [x0, #64]
2091; NONEON-NOSVE-NEXT:    ldp q19, q18, [x0, #160]
2092; NONEON-NOSVE-NEXT:    ldp q21, q20, [x0, #128]
2093; NONEON-NOSVE-NEXT:    str q0, [sp, #320]
2094; NONEON-NOSVE-NEXT:    ldr w8, [sp, #332]
2095; NONEON-NOSVE-NEXT:    stp q17, q23, [sp, #160]
2096; NONEON-NOSVE-NEXT:    ldr w10, [sp, #320]
2097; NONEON-NOSVE-NEXT:    stp q22, q16, [sp, #192]
2098; NONEON-NOSVE-NEXT:    ldr w23, [sp, #328]
2099; NONEON-NOSVE-NEXT:    add w21, w8, w8
2100; NONEON-NOSVE-NEXT:    stp q18, q20, [sp, #240]
2101; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #160]
2102; NONEON-NOSVE-NEXT:    stp q7, q21, [sp, #368]
2103; NONEON-NOSVE-NEXT:    str q19, [sp, #224]
2104; NONEON-NOSVE-NEXT:    ldr w29, [sp, #380]
2105; NONEON-NOSVE-NEXT:    ldr w30, [sp, #376]
2106; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #136] // 8-byte Folded Spill
2107; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #168]
2108; NONEON-NOSVE-NEXT:    stp q4, q6, [sp, #288]
2109; NONEON-NOSVE-NEXT:    stp q2, q1, [sp, #336]
2110; NONEON-NOSVE-NEXT:    ldr w3, [sp, #300]
2111; NONEON-NOSVE-NEXT:    ldr w4, [sp, #296]
2112; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128] // 8-byte Folded Spill
2113; NONEON-NOSVE-NEXT:    ldr w11, [sp, #360]
2114; NONEON-NOSVE-NEXT:    ldr w12, [sp, #356]
2115; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #208]
2116; NONEON-NOSVE-NEXT:    ldr w13, [sp, #352]
2117; NONEON-NOSVE-NEXT:    ldr w14, [sp, #348]
2118; NONEON-NOSVE-NEXT:    ldr w15, [sp, #344]
2119; NONEON-NOSVE-NEXT:    str q3, [sp, #144]
2120; NONEON-NOSVE-NEXT:    ldr w16, [sp, #340]
2121; NONEON-NOSVE-NEXT:    ldr w17, [sp, #336]
2122; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #120] // 8-byte Folded Spill
2123; NONEON-NOSVE-NEXT:    ldr w6, [sp, #292]
2124; NONEON-NOSVE-NEXT:    ldr w7, [sp, #288]
2125; NONEON-NOSVE-NEXT:    str q5, [sp, #272]
2126; NONEON-NOSVE-NEXT:    ldr w25, [sp, #316]
2127; NONEON-NOSVE-NEXT:    ldr w26, [sp, #312]
2128; NONEON-NOSVE-NEXT:    ldr w19, [sp, #284]
2129; NONEON-NOSVE-NEXT:    ldr w20, [sp, #280]
2130; NONEON-NOSVE-NEXT:    ldr w22, [sp, #276]
2131; NONEON-NOSVE-NEXT:    ldr w24, [sp, #272]
2132; NONEON-NOSVE-NEXT:    ldr w27, [sp, #308]
2133; NONEON-NOSVE-NEXT:    ldr w28, [sp, #304]
2134; NONEON-NOSVE-NEXT:    strh w21, [sp, #494]
2135; NONEON-NOSVE-NEXT:    add w21, w23, w23
2136; NONEON-NOSVE-NEXT:    strh w21, [sp, #492]
2137; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #216]
2138; NONEON-NOSVE-NEXT:    ldp w0, w18, [sp, #152]
2139; NONEON-NOSVE-NEXT:    ldp w2, w1, [sp, #144]
2140; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #112] // 8-byte Folded Spill
2141; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #176]
2142; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #104] // 8-byte Folded Spill
2143; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #184]
2144; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #96] // 8-byte Folded Spill
2145; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #192]
2146; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #88] // 8-byte Folded Spill
2147; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #200]
2148; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #80] // 8-byte Folded Spill
2149; NONEON-NOSVE-NEXT:    ldr w9, [sp, #384]
2150; NONEON-NOSVE-NEXT:    ldr w8, [sp, #388]
2151; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #72] // 8-byte Folded Spill
2152; NONEON-NOSVE-NEXT:    ldr w9, [sp, #392]
2153; NONEON-NOSVE-NEXT:    ldr w8, [sp, #396]
2154; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #64] // 8-byte Folded Spill
2155; NONEON-NOSVE-NEXT:    ldr w9, [sp, #256]
2156; NONEON-NOSVE-NEXT:    ldr w8, [sp, #260]
2157; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #56] // 8-byte Folded Spill
2158; NONEON-NOSVE-NEXT:    ldr w9, [sp, #264]
2159; NONEON-NOSVE-NEXT:    ldr w8, [sp, #268]
2160; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #48] // 8-byte Folded Spill
2161; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #224]
2162; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40] // 8-byte Folded Spill
2163; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #232]
2164; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #32] // 8-byte Folded Spill
2165; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #240]
2166; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #24] // 8-byte Folded Spill
2167; NONEON-NOSVE-NEXT:    ldp w9, w8, [sp, #248]
2168; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #16] // 8-byte Folded Spill
2169; NONEON-NOSVE-NEXT:    ldr w9, [sp, #368]
2170; NONEON-NOSVE-NEXT:    ldr w8, [sp, #372]
2171; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #8] // 8-byte Folded Spill
2172; NONEON-NOSVE-NEXT:    ldr w8, [sp, #324]
2173; NONEON-NOSVE-NEXT:    ldr w9, [sp, #364]
2174; NONEON-NOSVE-NEXT:    add w8, w8, w8
2175; NONEON-NOSVE-NEXT:    strh w8, [sp, #490]
2176; NONEON-NOSVE-NEXT:    add w8, w10, w10
2177; NONEON-NOSVE-NEXT:    strh w8, [sp, #488]
2178; NONEON-NOSVE-NEXT:    add w8, w9, w9
2179; NONEON-NOSVE-NEXT:    strh w8, [sp, #486]
2180; NONEON-NOSVE-NEXT:    add w8, w11, w11
2181; NONEON-NOSVE-NEXT:    strh w8, [sp, #484]
2182; NONEON-NOSVE-NEXT:    add w8, w12, w12
2183; NONEON-NOSVE-NEXT:    strh w8, [sp, #482]
2184; NONEON-NOSVE-NEXT:    add w8, w13, w13
2185; NONEON-NOSVE-NEXT:    strh w8, [sp, #480]
2186; NONEON-NOSVE-NEXT:    add w8, w14, w14
2187; NONEON-NOSVE-NEXT:    strh w8, [sp, #478]
2188; NONEON-NOSVE-NEXT:    add w8, w15, w15
2189; NONEON-NOSVE-NEXT:    strh w8, [sp, #476]
2190; NONEON-NOSVE-NEXT:    add w8, w16, w16
2191; NONEON-NOSVE-NEXT:    strh w8, [sp, #474]
2192; NONEON-NOSVE-NEXT:    add w8, w17, w17
2193; NONEON-NOSVE-NEXT:    strh w8, [sp, #472]
2194; NONEON-NOSVE-NEXT:    add w8, w18, w18
2195; NONEON-NOSVE-NEXT:    strh w8, [sp, #470]
2196; NONEON-NOSVE-NEXT:    add w8, w0, w0
2197; NONEON-NOSVE-NEXT:    strh w8, [sp, #468]
2198; NONEON-NOSVE-NEXT:    add w8, w1, w1
2199; NONEON-NOSVE-NEXT:    strh w8, [sp, #466]
2200; NONEON-NOSVE-NEXT:    add w8, w2, w2
2201; NONEON-NOSVE-NEXT:    strh w8, [sp, #464]
2202; NONEON-NOSVE-NEXT:    add w8, w3, w3
2203; NONEON-NOSVE-NEXT:    strh w8, [sp, #462]
2204; NONEON-NOSVE-NEXT:    add w8, w4, w4
2205; NONEON-NOSVE-NEXT:    strh w8, [sp, #460]
2206; NONEON-NOSVE-NEXT:    add w8, w6, w6
2207; NONEON-NOSVE-NEXT:    strh w8, [sp, #458]
2208; NONEON-NOSVE-NEXT:    add w8, w7, w7
2209; NONEON-NOSVE-NEXT:    strh w8, [sp, #456]
2210; NONEON-NOSVE-NEXT:    add w8, w19, w19
2211; NONEON-NOSVE-NEXT:    strh w8, [sp, #454]
2212; NONEON-NOSVE-NEXT:    add w8, w20, w20
2213; NONEON-NOSVE-NEXT:    strh w8, [sp, #452]
2214; NONEON-NOSVE-NEXT:    add w8, w22, w22
2215; NONEON-NOSVE-NEXT:    strh w8, [sp, #450]
2216; NONEON-NOSVE-NEXT:    add w8, w24, w24
2217; NONEON-NOSVE-NEXT:    strh w8, [sp, #448]
2218; NONEON-NOSVE-NEXT:    add w8, w25, w25
2219; NONEON-NOSVE-NEXT:    strh w8, [sp, #510]
2220; NONEON-NOSVE-NEXT:    add w8, w26, w26
2221; NONEON-NOSVE-NEXT:    strh w8, [sp, #508]
2222; NONEON-NOSVE-NEXT:    add w8, w27, w27
2223; NONEON-NOSVE-NEXT:    strh w8, [sp, #506]
2224; NONEON-NOSVE-NEXT:    add w8, w28, w28
2225; NONEON-NOSVE-NEXT:    strh w8, [sp, #504]
2226; NONEON-NOSVE-NEXT:    add w8, w29, w29
2227; NONEON-NOSVE-NEXT:    strh w8, [sp, #502]
2228; NONEON-NOSVE-NEXT:    add w8, w30, w30
2229; NONEON-NOSVE-NEXT:    strh w8, [sp, #500]
2230; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8] // 4-byte Folded Reload
2231; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #464]
2232; NONEON-NOSVE-NEXT:    add w8, w8, w8
2233; NONEON-NOSVE-NEXT:    strh w8, [sp, #498]
2234; NONEON-NOSVE-NEXT:    ldr w8, [sp, #12] // 4-byte Folded Reload
2235; NONEON-NOSVE-NEXT:    add w8, w8, w8
2236; NONEON-NOSVE-NEXT:    strh w8, [sp, #496]
2237; NONEON-NOSVE-NEXT:    ldr w8, [sp, #16] // 4-byte Folded Reload
2238; NONEON-NOSVE-NEXT:    add w8, w8, w8
2239; NONEON-NOSVE-NEXT:    strh w8, [sp, #446]
2240; NONEON-NOSVE-NEXT:    ldr w8, [sp, #20] // 4-byte Folded Reload
2241; NONEON-NOSVE-NEXT:    add w8, w8, w8
2242; NONEON-NOSVE-NEXT:    strh w8, [sp, #444]
2243; NONEON-NOSVE-NEXT:    ldr w8, [sp, #24] // 4-byte Folded Reload
2244; NONEON-NOSVE-NEXT:    add w8, w8, w8
2245; NONEON-NOSVE-NEXT:    strh w8, [sp, #442]
2246; NONEON-NOSVE-NEXT:    ldr w8, [sp, #28] // 4-byte Folded Reload
2247; NONEON-NOSVE-NEXT:    add w8, w8, w8
2248; NONEON-NOSVE-NEXT:    strh w8, [sp, #440]
2249; NONEON-NOSVE-NEXT:    ldr w8, [sp, #32] // 4-byte Folded Reload
2250; NONEON-NOSVE-NEXT:    add w8, w8, w8
2251; NONEON-NOSVE-NEXT:    strh w8, [sp, #438]
2252; NONEON-NOSVE-NEXT:    ldr w8, [sp, #36] // 4-byte Folded Reload
2253; NONEON-NOSVE-NEXT:    add w8, w8, w8
2254; NONEON-NOSVE-NEXT:    strh w8, [sp, #436]
2255; NONEON-NOSVE-NEXT:    ldr w8, [sp, #40] // 4-byte Folded Reload
2256; NONEON-NOSVE-NEXT:    add w8, w8, w8
2257; NONEON-NOSVE-NEXT:    strh w8, [sp, #434]
2258; NONEON-NOSVE-NEXT:    ldr w8, [sp, #44] // 4-byte Folded Reload
2259; NONEON-NOSVE-NEXT:    add w8, w8, w8
2260; NONEON-NOSVE-NEXT:    strh w8, [sp, #432]
2261; NONEON-NOSVE-NEXT:    ldr w8, [sp, #48] // 4-byte Folded Reload
2262; NONEON-NOSVE-NEXT:    ldp q6, q3, [sp, #432]
2263; NONEON-NOSVE-NEXT:    add w8, w8, w8
2264; NONEON-NOSVE-NEXT:    strh w8, [sp, #526]
2265; NONEON-NOSVE-NEXT:    ldr w8, [sp, #52] // 4-byte Folded Reload
2266; NONEON-NOSVE-NEXT:    add w8, w8, w8
2267; NONEON-NOSVE-NEXT:    strh w8, [sp, #524]
2268; NONEON-NOSVE-NEXT:    ldr w8, [sp, #56] // 4-byte Folded Reload
2269; NONEON-NOSVE-NEXT:    add w8, w8, w8
2270; NONEON-NOSVE-NEXT:    strh w8, [sp, #522]
2271; NONEON-NOSVE-NEXT:    ldr w8, [sp, #60] // 4-byte Folded Reload
2272; NONEON-NOSVE-NEXT:    add w8, w8, w8
2273; NONEON-NOSVE-NEXT:    strh w8, [sp, #520]
2274; NONEON-NOSVE-NEXT:    ldr w8, [sp, #64] // 4-byte Folded Reload
2275; NONEON-NOSVE-NEXT:    add w8, w8, w8
2276; NONEON-NOSVE-NEXT:    strh w8, [sp, #518]
2277; NONEON-NOSVE-NEXT:    ldr w8, [sp, #68] // 4-byte Folded Reload
2278; NONEON-NOSVE-NEXT:    add w8, w8, w8
2279; NONEON-NOSVE-NEXT:    strh w8, [sp, #516]
2280; NONEON-NOSVE-NEXT:    ldr w8, [sp, #72] // 4-byte Folded Reload
2281; NONEON-NOSVE-NEXT:    add w8, w8, w8
2282; NONEON-NOSVE-NEXT:    strh w8, [sp, #514]
2283; NONEON-NOSVE-NEXT:    ldr w8, [sp, #76] // 4-byte Folded Reload
2284; NONEON-NOSVE-NEXT:    add w8, w8, w8
2285; NONEON-NOSVE-NEXT:    strh w8, [sp, #512]
2286; NONEON-NOSVE-NEXT:    ldr w8, [sp, #80] // 4-byte Folded Reload
2287; NONEON-NOSVE-NEXT:    ldp q4, q7, [sp, #496]
2288; NONEON-NOSVE-NEXT:    add w8, w8, w8
2289; NONEON-NOSVE-NEXT:    strh w8, [sp, #414]
2290; NONEON-NOSVE-NEXT:    ldr w8, [sp, #84] // 4-byte Folded Reload
2291; NONEON-NOSVE-NEXT:    add w8, w8, w8
2292; NONEON-NOSVE-NEXT:    strh w8, [sp, #412]
2293; NONEON-NOSVE-NEXT:    ldr w8, [sp, #88] // 4-byte Folded Reload
2294; NONEON-NOSVE-NEXT:    add w8, w8, w8
2295; NONEON-NOSVE-NEXT:    strh w8, [sp, #410]
2296; NONEON-NOSVE-NEXT:    ldr w8, [sp, #92] // 4-byte Folded Reload
2297; NONEON-NOSVE-NEXT:    add w8, w8, w8
2298; NONEON-NOSVE-NEXT:    strh w8, [sp, #408]
2299; NONEON-NOSVE-NEXT:    ldr w8, [sp, #96] // 4-byte Folded Reload
2300; NONEON-NOSVE-NEXT:    add w8, w8, w8
2301; NONEON-NOSVE-NEXT:    strh w8, [sp, #406]
2302; NONEON-NOSVE-NEXT:    ldr w8, [sp, #100] // 4-byte Folded Reload
2303; NONEON-NOSVE-NEXT:    add w8, w8, w8
2304; NONEON-NOSVE-NEXT:    strh w8, [sp, #404]
2305; NONEON-NOSVE-NEXT:    ldr w8, [sp, #104] // 4-byte Folded Reload
2306; NONEON-NOSVE-NEXT:    add w8, w8, w8
2307; NONEON-NOSVE-NEXT:    strh w8, [sp, #402]
2308; NONEON-NOSVE-NEXT:    ldr w8, [sp, #108] // 4-byte Folded Reload
2309; NONEON-NOSVE-NEXT:    add w8, w8, w8
2310; NONEON-NOSVE-NEXT:    strh w8, [sp, #400]
2311; NONEON-NOSVE-NEXT:    ldr w8, [sp, #112] // 4-byte Folded Reload
2312; NONEON-NOSVE-NEXT:    add w8, w8, w8
2313; NONEON-NOSVE-NEXT:    strh w8, [sp, #430]
2314; NONEON-NOSVE-NEXT:    ldr w8, [sp, #116] // 4-byte Folded Reload
2315; NONEON-NOSVE-NEXT:    add w8, w8, w8
2316; NONEON-NOSVE-NEXT:    strh w8, [sp, #428]
2317; NONEON-NOSVE-NEXT:    ldr w8, [sp, #120] // 4-byte Folded Reload
2318; NONEON-NOSVE-NEXT:    add w8, w8, w8
2319; NONEON-NOSVE-NEXT:    strh w8, [sp, #426]
2320; NONEON-NOSVE-NEXT:    ldr w8, [sp, #124] // 4-byte Folded Reload
2321; NONEON-NOSVE-NEXT:    add w8, w8, w8
2322; NONEON-NOSVE-NEXT:    strh w8, [sp, #424]
2323; NONEON-NOSVE-NEXT:    ldr w8, [sp, #128] // 4-byte Folded Reload
2324; NONEON-NOSVE-NEXT:    add w8, w8, w8
2325; NONEON-NOSVE-NEXT:    strh w8, [sp, #422]
2326; NONEON-NOSVE-NEXT:    ldr w8, [sp, #132] // 4-byte Folded Reload
2327; NONEON-NOSVE-NEXT:    add w8, w8, w8
2328; NONEON-NOSVE-NEXT:    strh w8, [sp, #420]
2329; NONEON-NOSVE-NEXT:    ldr w8, [sp, #136] // 4-byte Folded Reload
2330; NONEON-NOSVE-NEXT:    add w8, w8, w8
2331; NONEON-NOSVE-NEXT:    strh w8, [sp, #418]
2332; NONEON-NOSVE-NEXT:    ldr w8, [sp, #140] // 4-byte Folded Reload
2333; NONEON-NOSVE-NEXT:    add w8, w8, w8
2334; NONEON-NOSVE-NEXT:    strh w8, [sp, #416]
2335; NONEON-NOSVE-NEXT:    ldp q5, q2, [sp, #400]
2336; NONEON-NOSVE-NEXT:    stp q1, q0, [x5]
2337; NONEON-NOSVE-NEXT:    stp q4, q3, [x5, #32]
2338; NONEON-NOSVE-NEXT:    stp q7, q6, [x5, #64]
2339; NONEON-NOSVE-NEXT:    stp q2, q5, [x5, #96]
2340; NONEON-NOSVE-NEXT:    add sp, sp, #528
2341; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #80] // 16-byte Folded Reload
2342; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #64] // 16-byte Folded Reload
2343; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #48] // 16-byte Folded Reload
2344; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #32] // 16-byte Folded Reload
2345; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #16] // 16-byte Folded Reload
2346; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp], #96 // 16-byte Folded Reload
2347; NONEON-NOSVE-NEXT:    ret
2348  %a = load <64 x i32>, ptr %in
2349  %b = trunc <64 x i32> %a to <64 x i16>
2350  %c = add <64 x i16> %b, %b
2351  store <64 x i16> %c, ptr %out
2352  ret void
2353}
2354
2355;
2356; truncate i64 -> i8
2357;
2358
2359; NOTE: v4i8 is not legal so result i8 elements are held within i16 containers.
2360define <4 x i8> @trunc_v4i64_v4i8(ptr %in) nounwind {
2361; CHECK-LABEL: trunc_v4i64_v4i8:
2362; CHECK:       // %bb.0:
2363; CHECK-NEXT:    ldp q1, q0, [x0]
2364; CHECK-NEXT:    ptrue p0.s, vl2
2365; CHECK-NEXT:    uzp1 z3.s, z0.s, z0.s
2366; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
2367; CHECK-NEXT:    splice z0.s, p0, { z2.s, z3.s }
2368; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
2369; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
2370; CHECK-NEXT:    ret
2371;
2372; NONEON-NOSVE-LABEL: trunc_v4i64_v4i8:
2373; NONEON-NOSVE:       // %bb.0:
2374; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
2375; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-48]!
2376; NONEON-NOSVE-NEXT:    ldp x8, x9, [sp, #16]
2377; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
2378; NONEON-NOSVE-NEXT:    ldp x8, x10, [sp]
2379; NONEON-NOSVE-NEXT:    strh w9, [sp, #46]
2380; NONEON-NOSVE-NEXT:    strh w10, [sp, #42]
2381; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
2382; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
2383; NONEON-NOSVE-NEXT:    add sp, sp, #48
2384; NONEON-NOSVE-NEXT:    ret
2385  %a = load <4 x i64>, ptr %in
2386  %b = trunc <4 x i64> %a to <4 x i8>
2387  ret <4 x i8> %b
2388}
2389
2390define <8 x i8> @trunc_v8i64_v8i8(ptr %in) nounwind {
2391; CHECK-LABEL: trunc_v8i64_v8i8:
2392; CHECK:       // %bb.0:
2393; CHECK-NEXT:    ldp q1, q0, [x0, #32]
2394; CHECK-NEXT:    ptrue p0.s, vl2
2395; CHECK-NEXT:    ldp q3, q2, [x0]
2396; CHECK-NEXT:    uzp1 z5.s, z0.s, z0.s
2397; CHECK-NEXT:    uzp1 z4.s, z1.s, z1.s
2398; CHECK-NEXT:    uzp1 z1.s, z2.s, z2.s
2399; CHECK-NEXT:    uzp1 z0.s, z3.s, z3.s
2400; CHECK-NEXT:    splice z2.s, p0, { z4.s, z5.s }
2401; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
2402; CHECK-NEXT:    ptrue p0.h, vl4
2403; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
2404; CHECK-NEXT:    uzp1 z1.h, z0.h, z0.h
2405; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
2406; CHECK-NEXT:    uzp1 z0.b, z0.b, z0.b
2407; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
2408; CHECK-NEXT:    ret
2409;
2410; NONEON-NOSVE-LABEL: trunc_v8i64_v8i8:
2411; NONEON-NOSVE:       // %bb.0:
2412; NONEON-NOSVE-NEXT:    sub sp, sp, #80
2413; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0, #32]
2414; NONEON-NOSVE-NEXT:    ldp q2, q3, [x0]
2415; NONEON-NOSVE-NEXT:    str q1, [sp, #48]
2416; NONEON-NOSVE-NEXT:    stp q0, q3, [sp, #16]
2417; NONEON-NOSVE-NEXT:    ldp x8, x9, [sp, #16]
2418; NONEON-NOSVE-NEXT:    str q2, [sp]
2419; NONEON-NOSVE-NEXT:    strb w8, [sp, #78]
2420; NONEON-NOSVE-NEXT:    ldp x8, x10, [sp, #48]
2421; NONEON-NOSVE-NEXT:    strb w9, [sp, #79]
2422; NONEON-NOSVE-NEXT:    strb w8, [sp, #76]
2423; NONEON-NOSVE-NEXT:    ldp x8, x11, [sp, #32]
2424; NONEON-NOSVE-NEXT:    strb w10, [sp, #77]
2425; NONEON-NOSVE-NEXT:    strb w8, [sp, #74]
2426; NONEON-NOSVE-NEXT:    ldr x8, [sp, #8]
2427; NONEON-NOSVE-NEXT:    strb w11, [sp, #75]
2428; NONEON-NOSVE-NEXT:    strb w8, [sp, #73]
2429; NONEON-NOSVE-NEXT:    ldr x8, [sp]
2430; NONEON-NOSVE-NEXT:    strb w8, [sp, #72]
2431; NONEON-NOSVE-NEXT:    ldr d0, [sp, #72]
2432; NONEON-NOSVE-NEXT:    add sp, sp, #80
2433; NONEON-NOSVE-NEXT:    ret
2434  %a = load <8 x i64>, ptr %in
2435  %b = trunc <8 x i64> %a to <8 x i8>
2436  ret <8 x i8> %b
2437}
2438
2439define <16 x i8> @trunc_v16i64_v16i8(ptr %in) nounwind {
2440; CHECK-LABEL: trunc_v16i64_v16i8:
2441; CHECK:       // %bb.0:
2442; CHECK-NEXT:    ldp q0, q1, [x0, #96]
2443; CHECK-NEXT:    ptrue p0.s, vl2
2444; CHECK-NEXT:    ldp q2, q3, [x0, #32]
2445; CHECK-NEXT:    ldp q4, q5, [x0, #64]
2446; CHECK-NEXT:    ldp q6, q7, [x0]
2447; CHECK-NEXT:    uzp1 z17.s, z1.s, z1.s
2448; CHECK-NEXT:    uzp1 z16.s, z0.s, z0.s
2449; CHECK-NEXT:    uzp1 z19.s, z3.s, z3.s
2450; CHECK-NEXT:    uzp1 z1.s, z5.s, z5.s
2451; CHECK-NEXT:    uzp1 z18.s, z2.s, z2.s
2452; CHECK-NEXT:    uzp1 z0.s, z4.s, z4.s
2453; CHECK-NEXT:    uzp1 z3.s, z7.s, z7.s
2454; CHECK-NEXT:    uzp1 z2.s, z6.s, z6.s
2455; CHECK-NEXT:    splice z4.s, p0, { z16.s, z17.s }
2456; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
2457; CHECK-NEXT:    splice z1.s, p0, { z18.s, z19.s }
2458; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
2459; CHECK-NEXT:    ptrue p0.h, vl4
2460; CHECK-NEXT:    uzp1 z4.h, z4.h, z4.h
2461; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
2462; CHECK-NEXT:    uzp1 z1.h, z1.h, z1.h
2463; CHECK-NEXT:    uzp1 z0.h, z2.h, z2.h
2464; CHECK-NEXT:    splice z2.h, p0, { z3.h, z4.h }
2465; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
2466; CHECK-NEXT:    ptrue p0.b, vl8
2467; CHECK-NEXT:    uzp1 z2.b, z2.b, z2.b
2468; CHECK-NEXT:    uzp1 z1.b, z0.b, z0.b
2469; CHECK-NEXT:    splice z0.b, p0, { z1.b, z2.b }
2470; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
2471; CHECK-NEXT:    ret
2472;
2473; NONEON-NOSVE-LABEL: trunc_v16i64_v16i8:
2474; NONEON-NOSVE:       // %bb.0:
2475; NONEON-NOSVE-NEXT:    sub sp, sp, #144
2476; NONEON-NOSVE-NEXT:    ldp q0, q1, [x0, #96]
2477; NONEON-NOSVE-NEXT:    ldp q2, q3, [x0]
2478; NONEON-NOSVE-NEXT:    ldp q4, q5, [x0, #32]
2479; NONEON-NOSVE-NEXT:    ldp q6, q7, [x0, #64]
2480; NONEON-NOSVE-NEXT:    stp q0, q1, [sp, #16]
2481; NONEON-NOSVE-NEXT:    ldp x8, x9, [sp, #32]
2482; NONEON-NOSVE-NEXT:    str q3, [sp, #80]
2483; NONEON-NOSVE-NEXT:    str q2, [sp]
2484; NONEON-NOSVE-NEXT:    stp q7, q5, [sp, #48]
2485; NONEON-NOSVE-NEXT:    strb w8, [sp, #142]
2486; NONEON-NOSVE-NEXT:    ldp x8, x10, [sp, #16]
2487; NONEON-NOSVE-NEXT:    stp q4, q6, [sp, #96]
2488; NONEON-NOSVE-NEXT:    strb w9, [sp, #143]
2489; NONEON-NOSVE-NEXT:    strb w8, [sp, #140]
2490; NONEON-NOSVE-NEXT:    ldp x8, x11, [sp, #48]
2491; NONEON-NOSVE-NEXT:    strb w10, [sp, #141]
2492; NONEON-NOSVE-NEXT:    strb w8, [sp, #138]
2493; NONEON-NOSVE-NEXT:    ldr x8, [sp, #120]
2494; NONEON-NOSVE-NEXT:    strb w11, [sp, #139]
2495; NONEON-NOSVE-NEXT:    strb w8, [sp, #137]
2496; NONEON-NOSVE-NEXT:    ldr x8, [sp, #112]
2497; NONEON-NOSVE-NEXT:    strb w8, [sp, #136]
2498; NONEON-NOSVE-NEXT:    ldr x8, [sp, #72]
2499; NONEON-NOSVE-NEXT:    strb w8, [sp, #135]
2500; NONEON-NOSVE-NEXT:    ldr x8, [sp, #64]
2501; NONEON-NOSVE-NEXT:    strb w8, [sp, #134]
2502; NONEON-NOSVE-NEXT:    ldr x8, [sp, #104]
2503; NONEON-NOSVE-NEXT:    strb w8, [sp, #133]
2504; NONEON-NOSVE-NEXT:    ldr x8, [sp, #96]
2505; NONEON-NOSVE-NEXT:    strb w8, [sp, #132]
2506; NONEON-NOSVE-NEXT:    ldr x8, [sp, #88]
2507; NONEON-NOSVE-NEXT:    strb w8, [sp, #131]
2508; NONEON-NOSVE-NEXT:    ldr x8, [sp, #80]
2509; NONEON-NOSVE-NEXT:    strb w8, [sp, #130]
2510; NONEON-NOSVE-NEXT:    ldr x8, [sp, #8]
2511; NONEON-NOSVE-NEXT:    strb w8, [sp, #129]
2512; NONEON-NOSVE-NEXT:    ldr x8, [sp]
2513; NONEON-NOSVE-NEXT:    strb w8, [sp, #128]
2514; NONEON-NOSVE-NEXT:    ldr q0, [sp, #128]
2515; NONEON-NOSVE-NEXT:    add sp, sp, #144
2516; NONEON-NOSVE-NEXT:    ret
2517  %a = load <16 x i64>, ptr %in
2518  %b = trunc <16 x i64> %a to <16 x i8>
2519  ret <16 x i8> %b
2520}
2521
2522; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
2523define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind {
2524; CHECK-LABEL: trunc_v32i64_v32i8:
2525; CHECK:       // %bb.0:
2526; CHECK-NEXT:    ldp q5, q6, [x0, #224]
2527; CHECK-NEXT:    ptrue p0.s, vl2
2528; CHECK-NEXT:    ldp q2, q3, [x0, #32]
2529; CHECK-NEXT:    ldp q4, q7, [x0, #64]
2530; CHECK-NEXT:    uzp1 z17.s, z6.s, z6.s
2531; CHECK-NEXT:    ldp q6, q18, [x0, #192]
2532; CHECK-NEXT:    uzp1 z16.s, z5.s, z5.s
2533; CHECK-NEXT:    ldp q5, q19, [x0, #128]
2534; CHECK-NEXT:    ldp q0, q1, [x0]
2535; CHECK-NEXT:    uzp1 z21.s, z18.s, z18.s
2536; CHECK-NEXT:    ldp q18, q22, [x0, #160]
2537; CHECK-NEXT:    uzp1 z20.s, z6.s, z6.s
2538; CHECK-NEXT:    ldp q6, q23, [x0, #96]
2539; CHECK-NEXT:    splice z16.s, p0, { z16.s, z17.s }
2540; CHECK-NEXT:    uzp1 z27.s, z19.s, z19.s
2541; CHECK-NEXT:    uzp1 z25.s, z22.s, z22.s
2542; CHECK-NEXT:    uzp1 z26.s, z5.s, z5.s
2543; CHECK-NEXT:    uzp1 z24.s, z18.s, z18.s
2544; CHECK-NEXT:    uzp1 z18.s, z23.s, z23.s
2545; CHECK-NEXT:    uzp1 z23.s, z3.s, z3.s
2546; CHECK-NEXT:    uzp1 z17.s, z6.s, z6.s
2547; CHECK-NEXT:    uzp1 z6.s, z7.s, z7.s
2548; CHECK-NEXT:    uzp1 z22.s, z2.s, z2.s
2549; CHECK-NEXT:    uzp1 z5.s, z4.s, z4.s
2550; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
2551; CHECK-NEXT:    splice z3.s, p0, { z20.s, z21.s }
2552; CHECK-NEXT:    uzp1 z1.s, z0.s, z0.s
2553; CHECK-NEXT:    splice z0.s, p0, { z24.s, z25.s }
2554; CHECK-NEXT:    splice z7.s, p0, { z26.s, z27.s }
2555; CHECK-NEXT:    splice z4.s, p0, { z17.s, z18.s }
2556; CHECK-NEXT:    uzp1 z17.h, z16.h, z16.h
2557; CHECK-NEXT:    splice z5.s, p0, { z5.s, z6.s }
2558; CHECK-NEXT:    splice z6.s, p0, { z22.s, z23.s }
2559; CHECK-NEXT:    splice z1.s, p0, { z1.s, z2.s }
2560; CHECK-NEXT:    uzp1 z16.h, z3.h, z3.h
2561; CHECK-NEXT:    ptrue p0.h, vl4
2562; CHECK-NEXT:    uzp1 z3.h, z0.h, z0.h
2563; CHECK-NEXT:    uzp1 z19.h, z4.h, z4.h
2564; CHECK-NEXT:    uzp1 z2.h, z7.h, z7.h
2565; CHECK-NEXT:    uzp1 z18.h, z5.h, z5.h
2566; CHECK-NEXT:    uzp1 z5.h, z6.h, z6.h
2567; CHECK-NEXT:    splice z0.h, p0, { z16.h, z17.h }
2568; CHECK-NEXT:    uzp1 z4.h, z1.h, z1.h
2569; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
2570; CHECK-NEXT:    splice z2.h, p0, { z18.h, z19.h }
2571; CHECK-NEXT:    splice z3.h, p0, { z4.h, z5.h }
2572; CHECK-NEXT:    uzp1 z5.b, z0.b, z0.b
2573; CHECK-NEXT:    ptrue p0.b, vl8
2574; CHECK-NEXT:    uzp1 z4.b, z1.b, z1.b
2575; CHECK-NEXT:    uzp1 z7.b, z2.b, z2.b
2576; CHECK-NEXT:    uzp1 z6.b, z3.b, z3.b
2577; CHECK-NEXT:    splice z0.b, p0, { z4.b, z5.b }
2578; CHECK-NEXT:    splice z1.b, p0, { z6.b, z7.b }
2579; CHECK-NEXT:    add z0.b, z0.b, z0.b
2580; CHECK-NEXT:    add z1.b, z1.b, z1.b
2581; CHECK-NEXT:    stp q1, q0, [x1]
2582; CHECK-NEXT:    ret
2583;
2584; NONEON-NOSVE-LABEL: trunc_v32i64_v32i8:
2585; NONEON-NOSVE:       // %bb.0:
2586; NONEON-NOSVE-NEXT:    sub sp, sp, #416
2587; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0, #96]
2588; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #336] // 16-byte Folded Spill
2589; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0, #64]
2590; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #352] // 16-byte Folded Spill
2591; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #368] // 16-byte Folded Spill
2592; NONEON-NOSVE-NEXT:    ldp q5, q4, [x0, #32]
2593; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #384] // 16-byte Folded Spill
2594; NONEON-NOSVE-NEXT:    ldp q17, q16, [x0, #128]
2595; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #400] // 16-byte Folded Spill
2596; NONEON-NOSVE-NEXT:    ldp q7, q6, [x0]
2597; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #320] // 16-byte Folded Spill
2598; NONEON-NOSVE-NEXT:    ldp q19, q18, [x0, #224]
2599; NONEON-NOSVE-NEXT:    str x1, [sp, #24] // 8-byte Folded Spill
2600; NONEON-NOSVE-NEXT:    ldp q21, q20, [x0, #192]
2601; NONEON-NOSVE-NEXT:    ldp q23, q22, [x0, #160]
2602; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #160]
2603; NONEON-NOSVE-NEXT:    ldr w8, [sp, #176]
2604; NONEON-NOSVE-NEXT:    ldr w9, [sp, #184]
2605; NONEON-NOSVE-NEXT:    stp q2, q4, [sp, #192]
2606; NONEON-NOSVE-NEXT:    stp q21, q19, [sp, #48]
2607; NONEON-NOSVE-NEXT:    ldr w25, [sp, #208]
2608; NONEON-NOSVE-NEXT:    ldr w26, [sp, #216]
2609; NONEON-NOSVE-NEXT:    add w5, w9, w9
2610; NONEON-NOSVE-NEXT:    add w6, w8, w8
2611; NONEON-NOSVE-NEXT:    ldr w9, [sp, #192]
2612; NONEON-NOSVE-NEXT:    stp q20, q23, [sp, #96]
2613; NONEON-NOSVE-NEXT:    ldr w2, [sp, #64]
2614; NONEON-NOSVE-NEXT:    ldr w16, [sp, #48]
2615; NONEON-NOSVE-NEXT:    add w9, w9, w9
2616; NONEON-NOSVE-NEXT:    ldr w18, [sp, #96]
2617; NONEON-NOSVE-NEXT:    stp q22, q16, [sp, #128]
2618; NONEON-NOSVE-NEXT:    stp q6, q5, [sp, #224]
2619; NONEON-NOSVE-NEXT:    ldr w3, [sp, #72]
2620; NONEON-NOSVE-NEXT:    ldr w14, [sp, #128]
2621; NONEON-NOSVE-NEXT:    stp q3, q17, [sp, #256]
2622; NONEON-NOSVE-NEXT:    ldr w23, [sp, #240]
2623; NONEON-NOSVE-NEXT:    ldr w21, [sp, #224]
2624; NONEON-NOSVE-NEXT:    ldr w8, [sp, #272]
2625; NONEON-NOSVE-NEXT:    ldr w27, [sp, #256]
2626; NONEON-NOSVE-NEXT:    ldr w28, [sp, #264]
2627; NONEON-NOSVE-NEXT:    strb w9, [sp, #298]
2628; NONEON-NOSVE-NEXT:    ldr w24, [sp, #248]
2629; NONEON-NOSVE-NEXT:    ldr w22, [sp, #232]
2630; NONEON-NOSVE-NEXT:    add w9, w27, w27
2631; NONEON-NOSVE-NEXT:    str w8, [sp, #20] // 4-byte Folded Spill
2632; NONEON-NOSVE-NEXT:    ldr w8, [sp, #200]
2633; NONEON-NOSVE-NEXT:    str q7, [sp, #32]
2634; NONEON-NOSVE-NEXT:    ldr w0, [sp, #104]
2635; NONEON-NOSVE-NEXT:    ldr w12, [sp, #112]
2636; NONEON-NOSVE-NEXT:    add w8, w8, w8
2637; NONEON-NOSVE-NEXT:    strb w9, [sp, #296]
2638; NONEON-NOSVE-NEXT:    add w9, w25, w25
2639; NONEON-NOSVE-NEXT:    str q18, [sp, #80]
2640; NONEON-NOSVE-NEXT:    ldr w19, [sp, #32]
2641; NONEON-NOSVE-NEXT:    ldr w20, [sp, #40]
2642; NONEON-NOSVE-NEXT:    strb w8, [sp, #299]
2643; NONEON-NOSVE-NEXT:    add w8, w28, w28
2644; NONEON-NOSVE-NEXT:    ldr w4, [sp, #80]
2645; NONEON-NOSVE-NEXT:    strb w9, [sp, #294]
2646; NONEON-NOSVE-NEXT:    add w9, w23, w23
2647; NONEON-NOSVE-NEXT:    ldr w7, [sp, #88]
2648; NONEON-NOSVE-NEXT:    strb w8, [sp, #297]
2649; NONEON-NOSVE-NEXT:    add w8, w26, w26
2650; NONEON-NOSVE-NEXT:    ldr w17, [sp, #56]
2651; NONEON-NOSVE-NEXT:    strb w9, [sp, #292]
2652; NONEON-NOSVE-NEXT:    add w9, w21, w21
2653; NONEON-NOSVE-NEXT:    ldr w10, [sp, #144]
2654; NONEON-NOSVE-NEXT:    strb w8, [sp, #295]
2655; NONEON-NOSVE-NEXT:    add w8, w24, w24
2656; NONEON-NOSVE-NEXT:    ldr w15, [sp, #136]
2657; NONEON-NOSVE-NEXT:    strb w9, [sp, #290]
2658; NONEON-NOSVE-NEXT:    add w9, w19, w19
2659; NONEON-NOSVE-NEXT:    ldr w13, [sp, #120]
2660; NONEON-NOSVE-NEXT:    strb w8, [sp, #293]
2661; NONEON-NOSVE-NEXT:    add w8, w22, w22
2662; NONEON-NOSVE-NEXT:    ldr w11, [sp, #152]
2663; NONEON-NOSVE-NEXT:    strb w9, [sp, #288]
2664; NONEON-NOSVE-NEXT:    add w9, w4, w4
2665; NONEON-NOSVE-NEXT:    ldr w1, [sp, #280]
2666; NONEON-NOSVE-NEXT:    strb w8, [sp, #291]
2667; NONEON-NOSVE-NEXT:    add w8, w20, w20
2668; NONEON-NOSVE-NEXT:    ldr w29, [sp, #160]
2669; NONEON-NOSVE-NEXT:    strb w9, [sp, #318]
2670; NONEON-NOSVE-NEXT:    add w9, w2, w2
2671; NONEON-NOSVE-NEXT:    ldr w30, [sp, #168]
2672; NONEON-NOSVE-NEXT:    strb w8, [sp, #289]
2673; NONEON-NOSVE-NEXT:    add w8, w7, w7
2674; NONEON-NOSVE-NEXT:    strb w9, [sp, #316]
2675; NONEON-NOSVE-NEXT:    add w9, w18, w18
2676; NONEON-NOSVE-NEXT:    strb w8, [sp, #319]
2677; NONEON-NOSVE-NEXT:    add w8, w3, w3
2678; NONEON-NOSVE-NEXT:    strb w9, [sp, #314]
2679; NONEON-NOSVE-NEXT:    add w9, w16, w16
2680; NONEON-NOSVE-NEXT:    strb w8, [sp, #317]
2681; NONEON-NOSVE-NEXT:    add w8, w0, w0
2682; NONEON-NOSVE-NEXT:    strb w9, [sp, #312]
2683; NONEON-NOSVE-NEXT:    add w9, w14, w14
2684; NONEON-NOSVE-NEXT:    strb w8, [sp, #315]
2685; NONEON-NOSVE-NEXT:    add w8, w17, w17
2686; NONEON-NOSVE-NEXT:    strb w9, [sp, #310]
2687; NONEON-NOSVE-NEXT:    add w9, w12, w12
2688; NONEON-NOSVE-NEXT:    strb w8, [sp, #313]
2689; NONEON-NOSVE-NEXT:    add w8, w15, w15
2690; NONEON-NOSVE-NEXT:    strb w9, [sp, #308]
2691; NONEON-NOSVE-NEXT:    add w9, w10, w10
2692; NONEON-NOSVE-NEXT:    strb w8, [sp, #311]
2693; NONEON-NOSVE-NEXT:    add w8, w13, w13
2694; NONEON-NOSVE-NEXT:    strb w9, [sp, #306]
2695; NONEON-NOSVE-NEXT:    ldr w9, [sp, #20] // 4-byte Folded Reload
2696; NONEON-NOSVE-NEXT:    strb w8, [sp, #309]
2697; NONEON-NOSVE-NEXT:    add w8, w11, w11
2698; NONEON-NOSVE-NEXT:    add w9, w9, w9
2699; NONEON-NOSVE-NEXT:    strb w5, [sp, #303]
2700; NONEON-NOSVE-NEXT:    add w5, w30, w30
2701; NONEON-NOSVE-NEXT:    strb w6, [sp, #302]
2702; NONEON-NOSVE-NEXT:    add w6, w29, w29
2703; NONEON-NOSVE-NEXT:    strb w8, [sp, #307]
2704; NONEON-NOSVE-NEXT:    add w8, w1, w1
2705; NONEON-NOSVE-NEXT:    strb w5, [sp, #301]
2706; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #400] // 16-byte Folded Reload
2707; NONEON-NOSVE-NEXT:    strb w6, [sp, #300]
2708; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #384] // 16-byte Folded Reload
2709; NONEON-NOSVE-NEXT:    strb w8, [sp, #305]
2710; NONEON-NOSVE-NEXT:    ldr x8, [sp, #24] // 8-byte Folded Reload
2711; NONEON-NOSVE-NEXT:    strb w9, [sp, #304]
2712; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #368] // 16-byte Folded Reload
2713; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #288]
2714; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #352] // 16-byte Folded Reload
2715; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #336] // 16-byte Folded Reload
2716; NONEON-NOSVE-NEXT:    stp q1, q0, [x8]
2717; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #320] // 16-byte Folded Reload
2718; NONEON-NOSVE-NEXT:    add sp, sp, #416
2719; NONEON-NOSVE-NEXT:    ret
2720  %a = load <32 x i64>, ptr %in
2721  %b = trunc <32 x i64> %a to <32 x i8>
2722  %c = add <32 x i8> %b, %b
2723  store <32 x i8> %c, ptr %out
2724  ret void
2725}
2726
2727;
2728; truncate i64 -> i16
2729;
2730
2731define <4 x i16> @trunc_v4i64_v4i16(ptr %in) nounwind {
2732; CHECK-LABEL: trunc_v4i64_v4i16:
2733; CHECK:       // %bb.0:
2734; CHECK-NEXT:    ldp q1, q0, [x0]
2735; CHECK-NEXT:    ptrue p0.s, vl2
2736; CHECK-NEXT:    uzp1 z3.s, z0.s, z0.s
2737; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
2738; CHECK-NEXT:    splice z0.s, p0, { z2.s, z3.s }
2739; CHECK-NEXT:    uzp1 z0.h, z0.h, z0.h
2740; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
2741; CHECK-NEXT:    ret
2742;
2743; NONEON-NOSVE-LABEL: trunc_v4i64_v4i16:
2744; NONEON-NOSVE:       // %bb.0:
2745; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
2746; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-48]!
2747; NONEON-NOSVE-NEXT:    ldp x8, x9, [sp, #16]
2748; NONEON-NOSVE-NEXT:    strh w8, [sp, #44]
2749; NONEON-NOSVE-NEXT:    ldp x8, x10, [sp]
2750; NONEON-NOSVE-NEXT:    strh w9, [sp, #46]
2751; NONEON-NOSVE-NEXT:    strh w10, [sp, #42]
2752; NONEON-NOSVE-NEXT:    strh w8, [sp, #40]
2753; NONEON-NOSVE-NEXT:    ldr d0, [sp, #40]
2754; NONEON-NOSVE-NEXT:    add sp, sp, #48
2755; NONEON-NOSVE-NEXT:    ret
2756  %a = load <4 x i64>, ptr %in
2757  %b = trunc <4 x i64> %a to <4 x i16>
2758  ret <4 x i16> %b
2759}
2760
2761define <8 x i16> @trunc_v8i64_v8i16(ptr %in) nounwind {
2762; CHECK-LABEL: trunc_v8i64_v8i16:
2763; CHECK:       // %bb.0:
2764; CHECK-NEXT:    ldp q1, q0, [x0, #32]
2765; CHECK-NEXT:    ptrue p0.s, vl2
2766; CHECK-NEXT:    ldp q3, q2, [x0]
2767; CHECK-NEXT:    uzp1 z5.s, z0.s, z0.s
2768; CHECK-NEXT:    uzp1 z4.s, z1.s, z1.s
2769; CHECK-NEXT:    uzp1 z1.s, z2.s, z2.s
2770; CHECK-NEXT:    uzp1 z0.s, z3.s, z3.s
2771; CHECK-NEXT:    splice z2.s, p0, { z4.s, z5.s }
2772; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
2773; CHECK-NEXT:    ptrue p0.h, vl4
2774; CHECK-NEXT:    uzp1 z2.h, z2.h, z2.h
2775; CHECK-NEXT:    uzp1 z1.h, z0.h, z0.h
2776; CHECK-NEXT:    splice z0.h, p0, { z1.h, z2.h }
2777; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
2778; CHECK-NEXT:    ret
2779;
2780; NONEON-NOSVE-LABEL: trunc_v8i64_v8i16:
2781; NONEON-NOSVE:       // %bb.0:
2782; NONEON-NOSVE-NEXT:    sub sp, sp, #80
2783; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0, #32]
2784; NONEON-NOSVE-NEXT:    ldp q2, q3, [x0]
2785; NONEON-NOSVE-NEXT:    str q1, [sp, #48]
2786; NONEON-NOSVE-NEXT:    stp q0, q3, [sp, #16]
2787; NONEON-NOSVE-NEXT:    ldp x8, x9, [sp, #16]
2788; NONEON-NOSVE-NEXT:    str q2, [sp]
2789; NONEON-NOSVE-NEXT:    strh w8, [sp, #76]
2790; NONEON-NOSVE-NEXT:    ldp x8, x10, [sp, #48]
2791; NONEON-NOSVE-NEXT:    strh w9, [sp, #78]
2792; NONEON-NOSVE-NEXT:    strh w8, [sp, #72]
2793; NONEON-NOSVE-NEXT:    ldp x8, x11, [sp, #32]
2794; NONEON-NOSVE-NEXT:    strh w10, [sp, #74]
2795; NONEON-NOSVE-NEXT:    strh w8, [sp, #68]
2796; NONEON-NOSVE-NEXT:    ldr x8, [sp, #8]
2797; NONEON-NOSVE-NEXT:    strh w11, [sp, #70]
2798; NONEON-NOSVE-NEXT:    strh w8, [sp, #66]
2799; NONEON-NOSVE-NEXT:    ldr x8, [sp]
2800; NONEON-NOSVE-NEXT:    strh w8, [sp, #64]
2801; NONEON-NOSVE-NEXT:    ldr q0, [sp, #64]
2802; NONEON-NOSVE-NEXT:    add sp, sp, #80
2803; NONEON-NOSVE-NEXT:    ret
2804  %a = load <8 x i64>, ptr %in
2805  %b = trunc <8 x i64> %a to <8 x i16>
2806  ret <8 x i16> %b
2807}
2808
2809; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
2810define void @trunc_v16i64_v16i16(ptr %in, ptr %out) nounwind {
2811; CHECK-LABEL: trunc_v16i64_v16i16:
2812; CHECK:       // %bb.0:
2813; CHECK-NEXT:    ldp q0, q1, [x0, #96]
2814; CHECK-NEXT:    ptrue p0.s, vl2
2815; CHECK-NEXT:    ldp q2, q3, [x0, #32]
2816; CHECK-NEXT:    ldp q4, q5, [x0, #64]
2817; CHECK-NEXT:    ldp q6, q7, [x0]
2818; CHECK-NEXT:    uzp1 z17.s, z1.s, z1.s
2819; CHECK-NEXT:    uzp1 z16.s, z0.s, z0.s
2820; CHECK-NEXT:    uzp1 z1.s, z3.s, z3.s
2821; CHECK-NEXT:    uzp1 z19.s, z5.s, z5.s
2822; CHECK-NEXT:    uzp1 z0.s, z2.s, z2.s
2823; CHECK-NEXT:    uzp1 z3.s, z7.s, z7.s
2824; CHECK-NEXT:    uzp1 z18.s, z4.s, z4.s
2825; CHECK-NEXT:    uzp1 z2.s, z6.s, z6.s
2826; CHECK-NEXT:    splice z4.s, p0, { z16.s, z17.s }
2827; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
2828; CHECK-NEXT:    splice z5.s, p0, { z18.s, z19.s }
2829; CHECK-NEXT:    splice z1.s, p0, { z2.s, z3.s }
2830; CHECK-NEXT:    ptrue p0.h, vl4
2831; CHECK-NEXT:    uzp1 z3.h, z4.h, z4.h
2832; CHECK-NEXT:    uzp1 z7.h, z0.h, z0.h
2833; CHECK-NEXT:    uzp1 z2.h, z5.h, z5.h
2834; CHECK-NEXT:    uzp1 z6.h, z1.h, z1.h
2835; CHECK-NEXT:    splice z0.h, p0, { z2.h, z3.h }
2836; CHECK-NEXT:    splice z1.h, p0, { z6.h, z7.h }
2837; CHECK-NEXT:    add z0.h, z0.h, z0.h
2838; CHECK-NEXT:    add z1.h, z1.h, z1.h
2839; CHECK-NEXT:    stp q1, q0, [x1]
2840; CHECK-NEXT:    ret
2841;
2842; NONEON-NOSVE-LABEL: trunc_v16i64_v16i16:
2843; NONEON-NOSVE:       // %bb.0:
2844; NONEON-NOSVE-NEXT:    sub sp, sp, #160
2845; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0, #32]
2846; NONEON-NOSVE-NEXT:    ldp q5, q4, [x0]
2847; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0, #64]
2848; NONEON-NOSVE-NEXT:    ldp q7, q6, [x0, #96]
2849; NONEON-NOSVE-NEXT:    stp q2, q4, [sp, #64]
2850; NONEON-NOSVE-NEXT:    stp q3, q1, [sp, #96]
2851; NONEON-NOSVE-NEXT:    ldr w8, [sp, #64]
2852; NONEON-NOSVE-NEXT:    ldr w9, [sp, #72]
2853; NONEON-NOSVE-NEXT:    ldr w2, [sp, #96]
2854; NONEON-NOSVE-NEXT:    ldr w3, [sp, #104]
2855; NONEON-NOSVE-NEXT:    stp q5, q7, [sp]
2856; NONEON-NOSVE-NEXT:    add w9, w9, w9
2857; NONEON-NOSVE-NEXT:    add w8, w8, w8
2858; NONEON-NOSVE-NEXT:    ldr w4, [sp, #80]
2859; NONEON-NOSVE-NEXT:    ldr w5, [sp, #88]
2860; NONEON-NOSVE-NEXT:    stp q6, q0, [sp, #32]
2861; NONEON-NOSVE-NEXT:    ldr w18, [sp]
2862; NONEON-NOSVE-NEXT:    ldr w0, [sp, #8]
2863; NONEON-NOSVE-NEXT:    strh w9, [sp, #142]
2864; NONEON-NOSVE-NEXT:    add w9, w3, w3
2865; NONEON-NOSVE-NEXT:    strh w8, [sp, #140]
2866; NONEON-NOSVE-NEXT:    add w8, w2, w2
2867; NONEON-NOSVE-NEXT:    ldr w16, [sp, #32]
2868; NONEON-NOSVE-NEXT:    ldr w17, [sp, #40]
2869; NONEON-NOSVE-NEXT:    strh w9, [sp, #138]
2870; NONEON-NOSVE-NEXT:    add w9, w5, w5
2871; NONEON-NOSVE-NEXT:    strh w8, [sp, #136]
2872; NONEON-NOSVE-NEXT:    add w8, w4, w4
2873; NONEON-NOSVE-NEXT:    ldr w14, [sp, #16]
2874; NONEON-NOSVE-NEXT:    ldr w15, [sp, #24]
2875; NONEON-NOSVE-NEXT:    strh w9, [sp, #134]
2876; NONEON-NOSVE-NEXT:    add w9, w0, w0
2877; NONEON-NOSVE-NEXT:    strh w8, [sp, #132]
2878; NONEON-NOSVE-NEXT:    add w8, w18, w18
2879; NONEON-NOSVE-NEXT:    ldr w12, [sp, #48]
2880; NONEON-NOSVE-NEXT:    ldr w13, [sp, #56]
2881; NONEON-NOSVE-NEXT:    strh w9, [sp, #130]
2882; NONEON-NOSVE-NEXT:    add w9, w17, w17
2883; NONEON-NOSVE-NEXT:    strh w8, [sp, #128]
2884; NONEON-NOSVE-NEXT:    add w8, w16, w16
2885; NONEON-NOSVE-NEXT:    ldr w10, [sp, #112]
2886; NONEON-NOSVE-NEXT:    ldr w11, [sp, #120]
2887; NONEON-NOSVE-NEXT:    strh w9, [sp, #158]
2888; NONEON-NOSVE-NEXT:    add w9, w15, w15
2889; NONEON-NOSVE-NEXT:    strh w8, [sp, #156]
2890; NONEON-NOSVE-NEXT:    add w8, w14, w14
2891; NONEON-NOSVE-NEXT:    strh w9, [sp, #154]
2892; NONEON-NOSVE-NEXT:    add w9, w13, w13
2893; NONEON-NOSVE-NEXT:    strh w8, [sp, #152]
2894; NONEON-NOSVE-NEXT:    add w8, w12, w12
2895; NONEON-NOSVE-NEXT:    strh w9, [sp, #150]
2896; NONEON-NOSVE-NEXT:    add w9, w11, w11
2897; NONEON-NOSVE-NEXT:    strh w8, [sp, #148]
2898; NONEON-NOSVE-NEXT:    add w8, w10, w10
2899; NONEON-NOSVE-NEXT:    strh w9, [sp, #146]
2900; NONEON-NOSVE-NEXT:    strh w8, [sp, #144]
2901; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #128]
2902; NONEON-NOSVE-NEXT:    stp q1, q0, [x1]
2903; NONEON-NOSVE-NEXT:    add sp, sp, #160
2904; NONEON-NOSVE-NEXT:    ret
2905  %a = load <16 x i64>, ptr %in
2906  %b = trunc <16 x i64> %a to <16 x i16>
2907  %c = add <16 x i16> %b, %b
2908  store <16 x i16> %c, ptr %out
2909  ret void
2910}
2911
2912; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
2913define void @trunc_v32i64_v32i16(ptr %in, ptr %out) nounwind {
2914; CHECK-LABEL: trunc_v32i64_v32i16:
2915; CHECK:       // %bb.0:
2916; CHECK-NEXT:    ldp q2, q3, [x0, #160]
2917; CHECK-NEXT:    ptrue p0.s, vl2
2918; CHECK-NEXT:    ldp q4, q5, [x0, #96]
2919; CHECK-NEXT:    ldp q6, q7, [x0]
2920; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
2921; CHECK-NEXT:    ldp q3, q18, [x0, #128]
2922; CHECK-NEXT:    uzp1 z16.s, z2.s, z2.s
2923; CHECK-NEXT:    ldp q2, q19, [x0, #192]
2924; CHECK-NEXT:    ldp q0, q1, [x0, #64]
2925; CHECK-NEXT:    uzp1 z21.s, z18.s, z18.s
2926; CHECK-NEXT:    ldp q18, q22, [x0, #224]
2927; CHECK-NEXT:    uzp1 z20.s, z3.s, z3.s
2928; CHECK-NEXT:    ldp q3, q23, [x0, #32]
2929; CHECK-NEXT:    splice z16.s, p0, { z16.s, z17.s }
2930; CHECK-NEXT:    uzp1 z27.s, z19.s, z19.s
2931; CHECK-NEXT:    uzp1 z25.s, z22.s, z22.s
2932; CHECK-NEXT:    uzp1 z26.s, z2.s, z2.s
2933; CHECK-NEXT:    uzp1 z24.s, z18.s, z18.s
2934; CHECK-NEXT:    uzp1 z18.s, z23.s, z23.s
2935; CHECK-NEXT:    uzp1 z23.s, z5.s, z5.s
2936; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
2937; CHECK-NEXT:    uzp1 z3.s, z7.s, z7.s
2938; CHECK-NEXT:    uzp1 z22.s, z4.s, z4.s
2939; CHECK-NEXT:    uzp1 z2.s, z6.s, z6.s
2940; CHECK-NEXT:    uzp1 z5.s, z1.s, z1.s
2941; CHECK-NEXT:    splice z1.s, p0, { z20.s, z21.s }
2942; CHECK-NEXT:    splice z6.s, p0, { z24.s, z25.s }
2943; CHECK-NEXT:    uzp1 z4.s, z0.s, z0.s
2944; CHECK-NEXT:    splice z0.s, p0, { z26.s, z27.s }
2945; CHECK-NEXT:    splice z7.s, p0, { z17.s, z18.s }
2946; CHECK-NEXT:    uzp1 z17.h, z16.h, z16.h
2947; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
2948; CHECK-NEXT:    splice z3.s, p0, { z22.s, z23.s }
2949; CHECK-NEXT:    splice z4.s, p0, { z4.s, z5.s }
2950; CHECK-NEXT:    uzp1 z16.h, z1.h, z1.h
2951; CHECK-NEXT:    ptrue p0.h, vl4
2952; CHECK-NEXT:    uzp1 z6.h, z6.h, z6.h
2953; CHECK-NEXT:    uzp1 z5.h, z0.h, z0.h
2954; CHECK-NEXT:    uzp1 z1.h, z7.h, z7.h
2955; CHECK-NEXT:    uzp1 z0.h, z2.h, z2.h
2956; CHECK-NEXT:    uzp1 z3.h, z3.h, z3.h
2957; CHECK-NEXT:    splice z7.h, p0, { z16.h, z17.h }
2958; CHECK-NEXT:    uzp1 z2.h, z4.h, z4.h
2959; CHECK-NEXT:    splice z4.h, p0, { z5.h, z6.h }
2960; CHECK-NEXT:    splice z0.h, p0, { z0.h, z1.h }
2961; CHECK-NEXT:    splice z1.h, p0, { z2.h, z3.h }
2962; CHECK-NEXT:    add z2.h, z7.h, z7.h
2963; CHECK-NEXT:    add z3.h, z4.h, z4.h
2964; CHECK-NEXT:    add z0.h, z0.h, z0.h
2965; CHECK-NEXT:    add z1.h, z1.h, z1.h
2966; CHECK-NEXT:    stp q2, q3, [x1, #32]
2967; CHECK-NEXT:    stp q0, q1, [x1]
2968; CHECK-NEXT:    ret
2969;
2970; NONEON-NOSVE-LABEL: trunc_v32i64_v32i16:
2971; NONEON-NOSVE:       // %bb.0:
2972; NONEON-NOSVE-NEXT:    sub sp, sp, #432
2973; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0, #96]
2974; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #352] // 16-byte Folded Spill
2975; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0, #64]
2976; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #368] // 16-byte Folded Spill
2977; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #384] // 16-byte Folded Spill
2978; NONEON-NOSVE-NEXT:    ldp q17, q16, [x0, #128]
2979; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #400] // 16-byte Folded Spill
2980; NONEON-NOSVE-NEXT:    ldp q5, q4, [x0, #32]
2981; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #416] // 16-byte Folded Spill
2982; NONEON-NOSVE-NEXT:    ldp q7, q6, [x0]
2983; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #336] // 16-byte Folded Spill
2984; NONEON-NOSVE-NEXT:    ldp q19, q18, [x0, #224]
2985; NONEON-NOSVE-NEXT:    ldp q21, q20, [x0, #192]
2986; NONEON-NOSVE-NEXT:    ldp q23, q22, [x0, #160]
2987; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #144]
2988; NONEON-NOSVE-NEXT:    ldr w8, [sp, #160]
2989; NONEON-NOSVE-NEXT:    ldr w9, [sp, #168]
2990; NONEON-NOSVE-NEXT:    stp q2, q4, [sp, #176]
2991; NONEON-NOSVE-NEXT:    stp q21, q19, [sp, #32]
2992; NONEON-NOSVE-NEXT:    ldr w25, [sp, #192]
2993; NONEON-NOSVE-NEXT:    ldr w26, [sp, #200]
2994; NONEON-NOSVE-NEXT:    add w6, w8, w8
2995; NONEON-NOSVE-NEXT:    add w5, w9, w9
2996; NONEON-NOSVE-NEXT:    ldr w9, [sp, #176]
2997; NONEON-NOSVE-NEXT:    stp q20, q23, [sp, #80]
2998; NONEON-NOSVE-NEXT:    ldr w2, [sp, #48]
2999; NONEON-NOSVE-NEXT:    ldr w3, [sp, #56]
3000; NONEON-NOSVE-NEXT:    add w9, w9, w9
3001; NONEON-NOSVE-NEXT:    ldr w18, [sp, #80]
3002; NONEON-NOSVE-NEXT:    stp q22, q16, [sp, #112]
3003; NONEON-NOSVE-NEXT:    stp q6, q5, [sp, #208]
3004; NONEON-NOSVE-NEXT:    ldr w0, [sp, #88]
3005; NONEON-NOSVE-NEXT:    ldr w16, [sp, #32]
3006; NONEON-NOSVE-NEXT:    stp q3, q17, [sp, #240]
3007; NONEON-NOSVE-NEXT:    ldr w23, [sp, #224]
3008; NONEON-NOSVE-NEXT:    ldr w24, [sp, #232]
3009; NONEON-NOSVE-NEXT:    ldr w10, [sp, #256]
3010; NONEON-NOSVE-NEXT:    ldr w8, [sp, #264]
3011; NONEON-NOSVE-NEXT:    ldr w27, [sp, #240]
3012; NONEON-NOSVE-NEXT:    ldr w28, [sp, #248]
3013; NONEON-NOSVE-NEXT:    strh w9, [sp, #308]
3014; NONEON-NOSVE-NEXT:    ldr w21, [sp, #208]
3015; NONEON-NOSVE-NEXT:    add w9, w27, w27
3016; NONEON-NOSVE-NEXT:    stp w8, w10, [sp, #8] // 8-byte Folded Spill
3017; NONEON-NOSVE-NEXT:    ldr w8, [sp, #184]
3018; NONEON-NOSVE-NEXT:    str q7, [sp, #16]
3019; NONEON-NOSVE-NEXT:    ldr w22, [sp, #216]
3020; NONEON-NOSVE-NEXT:    ldr w17, [sp, #40]
3021; NONEON-NOSVE-NEXT:    add w8, w8, w8
3022; NONEON-NOSVE-NEXT:    strh w9, [sp, #304]
3023; NONEON-NOSVE-NEXT:    add w9, w25, w25
3024; NONEON-NOSVE-NEXT:    strh w8, [sp, #310]
3025; NONEON-NOSVE-NEXT:    add w8, w28, w28
3026; NONEON-NOSVE-NEXT:    ldr w19, [sp, #16]
3027; NONEON-NOSVE-NEXT:    strh w8, [sp, #306]
3028; NONEON-NOSVE-NEXT:    add w8, w26, w26
3029; NONEON-NOSVE-NEXT:    ldr w20, [sp, #24]
3030; NONEON-NOSVE-NEXT:    str q18, [sp, #64]
3031; NONEON-NOSVE-NEXT:    ldr w14, [sp, #112]
3032; NONEON-NOSVE-NEXT:    ldr w15, [sp, #120]
3033; NONEON-NOSVE-NEXT:    strh w8, [sp, #302]
3034; NONEON-NOSVE-NEXT:    add w8, w24, w24
3035; NONEON-NOSVE-NEXT:    ldr w4, [sp, #64]
3036; NONEON-NOSVE-NEXT:    strh w9, [sp, #300]
3037; NONEON-NOSVE-NEXT:    add w9, w23, w23
3038; NONEON-NOSVE-NEXT:    ldr w7, [sp, #72]
3039; NONEON-NOSVE-NEXT:    strh w8, [sp, #298]
3040; NONEON-NOSVE-NEXT:    add w8, w22, w22
3041; NONEON-NOSVE-NEXT:    ldr w12, [sp, #96]
3042; NONEON-NOSVE-NEXT:    strh w9, [sp, #296]
3043; NONEON-NOSVE-NEXT:    add w9, w21, w21
3044; NONEON-NOSVE-NEXT:    ldr w13, [sp, #104]
3045; NONEON-NOSVE-NEXT:    strh w8, [sp, #294]
3046; NONEON-NOSVE-NEXT:    add w8, w20, w20
3047; NONEON-NOSVE-NEXT:    ldr w10, [sp, #128]
3048; NONEON-NOSVE-NEXT:    strh w9, [sp, #292]
3049; NONEON-NOSVE-NEXT:    add w9, w19, w19
3050; NONEON-NOSVE-NEXT:    ldr w11, [sp, #136]
3051; NONEON-NOSVE-NEXT:    strh w8, [sp, #290]
3052; NONEON-NOSVE-NEXT:    add w8, w7, w7
3053; NONEON-NOSVE-NEXT:    ldr w29, [sp, #144]
3054; NONEON-NOSVE-NEXT:    strh w9, [sp, #288]
3055; NONEON-NOSVE-NEXT:    add w9, w4, w4
3056; NONEON-NOSVE-NEXT:    ldr w30, [sp, #152]
3057; NONEON-NOSVE-NEXT:    strh w8, [sp, #286]
3058; NONEON-NOSVE-NEXT:    add w8, w3, w3
3059; NONEON-NOSVE-NEXT:    strh w9, [sp, #284]
3060; NONEON-NOSVE-NEXT:    add w9, w2, w2
3061; NONEON-NOSVE-NEXT:    strh w8, [sp, #282]
3062; NONEON-NOSVE-NEXT:    add w8, w0, w0
3063; NONEON-NOSVE-NEXT:    strh w9, [sp, #280]
3064; NONEON-NOSVE-NEXT:    add w9, w18, w18
3065; NONEON-NOSVE-NEXT:    strh w8, [sp, #278]
3066; NONEON-NOSVE-NEXT:    add w8, w17, w17
3067; NONEON-NOSVE-NEXT:    strh w9, [sp, #276]
3068; NONEON-NOSVE-NEXT:    add w9, w16, w16
3069; NONEON-NOSVE-NEXT:    strh w8, [sp, #274]
3070; NONEON-NOSVE-NEXT:    add w8, w15, w15
3071; NONEON-NOSVE-NEXT:    strh w9, [sp, #272]
3072; NONEON-NOSVE-NEXT:    add w9, w14, w14
3073; NONEON-NOSVE-NEXT:    strh w8, [sp, #334]
3074; NONEON-NOSVE-NEXT:    add w8, w13, w13
3075; NONEON-NOSVE-NEXT:    strh w9, [sp, #332]
3076; NONEON-NOSVE-NEXT:    add w9, w12, w12
3077; NONEON-NOSVE-NEXT:    strh w8, [sp, #330]
3078; NONEON-NOSVE-NEXT:    add w8, w11, w11
3079; NONEON-NOSVE-NEXT:    strh w9, [sp, #328]
3080; NONEON-NOSVE-NEXT:    add w9, w10, w10
3081; NONEON-NOSVE-NEXT:    strh w8, [sp, #326]
3082; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8] // 4-byte Folded Reload
3083; NONEON-NOSVE-NEXT:    strh w9, [sp, #324]
3084; NONEON-NOSVE-NEXT:    ldr w9, [sp, #12] // 4-byte Folded Reload
3085; NONEON-NOSVE-NEXT:    add w8, w8, w8
3086; NONEON-NOSVE-NEXT:    strh w5, [sp, #318]
3087; NONEON-NOSVE-NEXT:    add w5, w30, w30
3088; NONEON-NOSVE-NEXT:    strh w6, [sp, #316]
3089; NONEON-NOSVE-NEXT:    add w6, w29, w29
3090; NONEON-NOSVE-NEXT:    add w9, w9, w9
3091; NONEON-NOSVE-NEXT:    strh w5, [sp, #314]
3092; NONEON-NOSVE-NEXT:    ldp q1, q3, [sp, #272]
3093; NONEON-NOSVE-NEXT:    strh w6, [sp, #312]
3094; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #416] // 16-byte Folded Reload
3095; NONEON-NOSVE-NEXT:    strh w8, [sp, #322]
3096; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #400] // 16-byte Folded Reload
3097; NONEON-NOSVE-NEXT:    strh w9, [sp, #320]
3098; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #384] // 16-byte Folded Reload
3099; NONEON-NOSVE-NEXT:    ldp q2, q0, [sp, #304]
3100; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #368] // 16-byte Folded Reload
3101; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #352] // 16-byte Folded Reload
3102; NONEON-NOSVE-NEXT:    stp q3, q2, [x1]
3103; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #336] // 16-byte Folded Reload
3104; NONEON-NOSVE-NEXT:    stp q0, q1, [x1, #32]
3105; NONEON-NOSVE-NEXT:    add sp, sp, #432
3106; NONEON-NOSVE-NEXT:    ret
3107  %a = load <32 x i64>, ptr %in
3108  %b = trunc <32 x i64> %a to <32 x i16>
3109  %c = add <32 x i16> %b, %b
3110  store <32 x i16> %c, ptr %out
3111  ret void
3112}
3113
3114;
3115; truncate i64 -> i32
3116;
3117
3118define <4 x i32> @trunc_v4i64_v4i32(ptr %in) nounwind {
3119; CHECK-LABEL: trunc_v4i64_v4i32:
3120; CHECK:       // %bb.0:
3121; CHECK-NEXT:    ldp q1, q0, [x0]
3122; CHECK-NEXT:    ptrue p0.s, vl2
3123; CHECK-NEXT:    uzp1 z3.s, z0.s, z0.s
3124; CHECK-NEXT:    uzp1 z2.s, z1.s, z1.s
3125; CHECK-NEXT:    splice z0.s, p0, { z2.s, z3.s }
3126; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
3127; CHECK-NEXT:    ret
3128;
3129; NONEON-NOSVE-LABEL: trunc_v4i64_v4i32:
3130; NONEON-NOSVE:       // %bb.0:
3131; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0]
3132; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #-48]!
3133; NONEON-NOSVE-NEXT:    ldp x8, x9, [sp, #16]
3134; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #40]
3135; NONEON-NOSVE-NEXT:    ldp x8, x10, [sp]
3136; NONEON-NOSVE-NEXT:    stp w8, w10, [sp, #32]
3137; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
3138; NONEON-NOSVE-NEXT:    add sp, sp, #48
3139; NONEON-NOSVE-NEXT:    ret
3140  %a = load <4 x i64>, ptr %in
3141  %b = trunc <4 x i64> %a to <4 x i32>
3142  ret <4 x i32> %b
3143}
3144
3145; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
3146define void @trunc_v8i64_v8i32(ptr %in, ptr %out) nounwind {
3147; CHECK-LABEL: trunc_v8i64_v8i32:
3148; CHECK:       // %bb.0:
3149; CHECK-NEXT:    ldp q1, q0, [x0, #32]
3150; CHECK-NEXT:    ptrue p0.s, vl2
3151; CHECK-NEXT:    ldp q3, q2, [x0]
3152; CHECK-NEXT:    uzp1 z5.s, z0.s, z0.s
3153; CHECK-NEXT:    uzp1 z4.s, z1.s, z1.s
3154; CHECK-NEXT:    uzp1 z1.s, z2.s, z2.s
3155; CHECK-NEXT:    uzp1 z0.s, z3.s, z3.s
3156; CHECK-NEXT:    splice z2.s, p0, { z4.s, z5.s }
3157; CHECK-NEXT:    splice z0.s, p0, { z0.s, z1.s }
3158; CHECK-NEXT:    add z1.s, z2.s, z2.s
3159; CHECK-NEXT:    add z0.s, z0.s, z0.s
3160; CHECK-NEXT:    stp q0, q1, [x1]
3161; CHECK-NEXT:    ret
3162;
3163; NONEON-NOSVE-LABEL: trunc_v8i64_v8i32:
3164; NONEON-NOSVE:       // %bb.0:
3165; NONEON-NOSVE-NEXT:    sub sp, sp, #96
3166; NONEON-NOSVE-NEXT:    ldp q0, q1, [x0, #32]
3167; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0]
3168; NONEON-NOSVE-NEXT:    stp q3, q1, [sp]
3169; NONEON-NOSVE-NEXT:    stp q2, q0, [sp, #32]
3170; NONEON-NOSVE-NEXT:    ldr w12, [sp]
3171; NONEON-NOSVE-NEXT:    ldr w13, [sp, #8]
3172; NONEON-NOSVE-NEXT:    ldr w8, [sp, #32]
3173; NONEON-NOSVE-NEXT:    ldr w9, [sp, #40]
3174; NONEON-NOSVE-NEXT:    ldr w14, [sp, #16]
3175; NONEON-NOSVE-NEXT:    ldr w15, [sp, #24]
3176; NONEON-NOSVE-NEXT:    ldr w10, [sp, #48]
3177; NONEON-NOSVE-NEXT:    ldr w11, [sp, #56]
3178; NONEON-NOSVE-NEXT:    add w9, w9, w9
3179; NONEON-NOSVE-NEXT:    add w8, w8, w8
3180; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #72]
3181; NONEON-NOSVE-NEXT:    add w9, w13, w13
3182; NONEON-NOSVE-NEXT:    add w8, w12, w12
3183; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #64]
3184; NONEON-NOSVE-NEXT:    add w9, w15, w15
3185; NONEON-NOSVE-NEXT:    add w8, w14, w14
3186; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #88]
3187; NONEON-NOSVE-NEXT:    add w9, w11, w11
3188; NONEON-NOSVE-NEXT:    add w8, w10, w10
3189; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #80]
3190; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #64]
3191; NONEON-NOSVE-NEXT:    stp q1, q0, [x1]
3192; NONEON-NOSVE-NEXT:    add sp, sp, #96
3193; NONEON-NOSVE-NEXT:    ret
3194  %a = load <8 x i64>, ptr %in
3195  %b = trunc <8 x i64> %a to <8 x i32>
3196  %c = add <8 x i32> %b, %b
3197  store <8 x i32> %c, ptr %out
3198  ret void
3199}
3200
3201; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
3202define void @trunc_v16i64_v16i32(ptr %in, ptr %out) nounwind {
3203; CHECK-LABEL: trunc_v16i64_v16i32:
3204; CHECK:       // %bb.0:
3205; CHECK-NEXT:    ldp q1, q0, [x0, #64]
3206; CHECK-NEXT:    ptrue p0.s, vl2
3207; CHECK-NEXT:    ldp q2, q3, [x0, #96]
3208; CHECK-NEXT:    ldp q4, q5, [x0]
3209; CHECK-NEXT:    uzp1 z7.s, z0.s, z0.s
3210; CHECK-NEXT:    uzp1 z6.s, z1.s, z1.s
3211; CHECK-NEXT:    ldp q1, q0, [x0, #32]
3212; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
3213; CHECK-NEXT:    uzp1 z16.s, z2.s, z2.s
3214; CHECK-NEXT:    uzp1 z3.s, z5.s, z5.s
3215; CHECK-NEXT:    uzp1 z2.s, z4.s, z4.s
3216; CHECK-NEXT:    uzp1 z5.s, z0.s, z0.s
3217; CHECK-NEXT:    splice z0.s, p0, { z6.s, z7.s }
3218; CHECK-NEXT:    uzp1 z4.s, z1.s, z1.s
3219; CHECK-NEXT:    splice z1.s, p0, { z16.s, z17.s }
3220; CHECK-NEXT:    splice z2.s, p0, { z2.s, z3.s }
3221; CHECK-NEXT:    splice z3.s, p0, { z4.s, z5.s }
3222; CHECK-NEXT:    add z0.s, z0.s, z0.s
3223; CHECK-NEXT:    add z1.s, z1.s, z1.s
3224; CHECK-NEXT:    add z2.s, z2.s, z2.s
3225; CHECK-NEXT:    add z3.s, z3.s, z3.s
3226; CHECK-NEXT:    stp q0, q1, [x1, #32]
3227; CHECK-NEXT:    stp q2, q3, [x1]
3228; CHECK-NEXT:    ret
3229;
3230; NONEON-NOSVE-LABEL: trunc_v16i64_v16i32:
3231; NONEON-NOSVE:       // %bb.0:
3232; NONEON-NOSVE-NEXT:    sub sp, sp, #192
3233; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0, #32]
3234; NONEON-NOSVE-NEXT:    ldp q5, q4, [x0]
3235; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0, #64]
3236; NONEON-NOSVE-NEXT:    ldp q7, q6, [x0, #96]
3237; NONEON-NOSVE-NEXT:    stp q2, q4, [sp, #64]
3238; NONEON-NOSVE-NEXT:    stp q3, q1, [sp, #96]
3239; NONEON-NOSVE-NEXT:    ldr w8, [sp, #64]
3240; NONEON-NOSVE-NEXT:    ldr w9, [sp, #72]
3241; NONEON-NOSVE-NEXT:    ldr w2, [sp, #96]
3242; NONEON-NOSVE-NEXT:    ldr w3, [sp, #104]
3243; NONEON-NOSVE-NEXT:    stp q5, q7, [sp]
3244; NONEON-NOSVE-NEXT:    add w9, w9, w9
3245; NONEON-NOSVE-NEXT:    add w8, w8, w8
3246; NONEON-NOSVE-NEXT:    ldr w4, [sp, #80]
3247; NONEON-NOSVE-NEXT:    ldr w5, [sp, #88]
3248; NONEON-NOSVE-NEXT:    stp q6, q0, [sp, #32]
3249; NONEON-NOSVE-NEXT:    ldr w18, [sp]
3250; NONEON-NOSVE-NEXT:    ldr w0, [sp, #8]
3251; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #168]
3252; NONEON-NOSVE-NEXT:    add w9, w3, w3
3253; NONEON-NOSVE-NEXT:    add w8, w2, w2
3254; NONEON-NOSVE-NEXT:    ldr w16, [sp, #32]
3255; NONEON-NOSVE-NEXT:    ldr w17, [sp, #40]
3256; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #160]
3257; NONEON-NOSVE-NEXT:    add w9, w5, w5
3258; NONEON-NOSVE-NEXT:    add w8, w4, w4
3259; NONEON-NOSVE-NEXT:    ldr w14, [sp, #16]
3260; NONEON-NOSVE-NEXT:    ldr w15, [sp, #24]
3261; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #152]
3262; NONEON-NOSVE-NEXT:    add w9, w0, w0
3263; NONEON-NOSVE-NEXT:    add w8, w18, w18
3264; NONEON-NOSVE-NEXT:    ldr w12, [sp, #48]
3265; NONEON-NOSVE-NEXT:    ldr w13, [sp, #56]
3266; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #144]
3267; NONEON-NOSVE-NEXT:    add w9, w17, w17
3268; NONEON-NOSVE-NEXT:    add w8, w16, w16
3269; NONEON-NOSVE-NEXT:    ldr w10, [sp, #112]
3270; NONEON-NOSVE-NEXT:    ldr w11, [sp, #120]
3271; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #136]
3272; NONEON-NOSVE-NEXT:    add w9, w15, w15
3273; NONEON-NOSVE-NEXT:    add w8, w14, w14
3274; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #128]
3275; NONEON-NOSVE-NEXT:    add w9, w13, w13
3276; NONEON-NOSVE-NEXT:    add w8, w12, w12
3277; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #184]
3278; NONEON-NOSVE-NEXT:    add w9, w11, w11
3279; NONEON-NOSVE-NEXT:    add w8, w10, w10
3280; NONEON-NOSVE-NEXT:    stp w8, w9, [sp, #176]
3281; NONEON-NOSVE-NEXT:    ldp q1, q3, [sp, #128]
3282; NONEON-NOSVE-NEXT:    ldp q2, q0, [sp, #160]
3283; NONEON-NOSVE-NEXT:    stp q3, q2, [x1]
3284; NONEON-NOSVE-NEXT:    stp q0, q1, [x1, #32]
3285; NONEON-NOSVE-NEXT:    add sp, sp, #192
3286; NONEON-NOSVE-NEXT:    ret
3287  %a = load <16 x i64>, ptr %in
3288  %b = trunc <16 x i64> %a to <16 x i32>
3289  %c = add <16 x i32> %b, %b
3290  store <16 x i32> %c, ptr %out
3291  ret void
3292}
3293
3294; NOTE: Extra 'add' is to prevent the truncate being combined with the store.
3295define void @trunc_v32i64_v32i32(ptr %in, ptr %out) nounwind {
3296; CHECK-LABEL: trunc_v32i64_v32i32:
3297; CHECK:       // %bb.0:
3298; CHECK-NEXT:    ldp q2, q3, [x0, #192]
3299; CHECK-NEXT:    ptrue p0.s, vl2
3300; CHECK-NEXT:    ldp q4, q5, [x0]
3301; CHECK-NEXT:    ldp q6, q7, [x0, #64]
3302; CHECK-NEXT:    uzp1 z17.s, z3.s, z3.s
3303; CHECK-NEXT:    ldp q3, q18, [x0, #224]
3304; CHECK-NEXT:    uzp1 z16.s, z2.s, z2.s
3305; CHECK-NEXT:    ldp q2, q19, [x0, #128]
3306; CHECK-NEXT:    ldp q0, q1, [x0, #32]
3307; CHECK-NEXT:    uzp1 z21.s, z18.s, z18.s
3308; CHECK-NEXT:    ldp q18, q22, [x0, #160]
3309; CHECK-NEXT:    uzp1 z20.s, z3.s, z3.s
3310; CHECK-NEXT:    uzp1 z24.s, z19.s, z19.s
3311; CHECK-NEXT:    ldp q3, q19, [x0, #96]
3312; CHECK-NEXT:    uzp1 z23.s, z2.s, z2.s
3313; CHECK-NEXT:    uzp1 z26.s, z22.s, z22.s
3314; CHECK-NEXT:    splice z2.s, p0, { z16.s, z17.s }
3315; CHECK-NEXT:    uzp1 z17.s, z7.s, z7.s
3316; CHECK-NEXT:    uzp1 z25.s, z18.s, z18.s
3317; CHECK-NEXT:    splice z7.s, p0, { z20.s, z21.s }
3318; CHECK-NEXT:    uzp1 z21.s, z5.s, z5.s
3319; CHECK-NEXT:    uzp1 z19.s, z19.s, z19.s
3320; CHECK-NEXT:    uzp1 z20.s, z4.s, z4.s
3321; CHECK-NEXT:    uzp1 z5.s, z1.s, z1.s
3322; CHECK-NEXT:    uzp1 z16.s, z6.s, z6.s
3323; CHECK-NEXT:    splice z6.s, p0, { z23.s, z24.s }
3324; CHECK-NEXT:    uzp1 z18.s, z3.s, z3.s
3325; CHECK-NEXT:    splice z3.s, p0, { z25.s, z26.s }
3326; CHECK-NEXT:    uzp1 z4.s, z0.s, z0.s
3327; CHECK-NEXT:    add z0.s, z2.s, z2.s
3328; CHECK-NEXT:    add z7.s, z7.s, z7.s
3329; CHECK-NEXT:    splice z1.s, p0, { z16.s, z17.s }
3330; CHECK-NEXT:    splice z2.s, p0, { z18.s, z19.s }
3331; CHECK-NEXT:    splice z16.s, p0, { z20.s, z21.s }
3332; CHECK-NEXT:    splice z4.s, p0, { z4.s, z5.s }
3333; CHECK-NEXT:    add z6.s, z6.s, z6.s
3334; CHECK-NEXT:    add z3.s, z3.s, z3.s
3335; CHECK-NEXT:    stp q0, q7, [x1, #96]
3336; CHECK-NEXT:    add z0.s, z1.s, z1.s
3337; CHECK-NEXT:    add z1.s, z2.s, z2.s
3338; CHECK-NEXT:    add z2.s, z16.s, z16.s
3339; CHECK-NEXT:    stp q6, q3, [x1, #64]
3340; CHECK-NEXT:    add z3.s, z4.s, z4.s
3341; CHECK-NEXT:    stp q0, q1, [x1, #32]
3342; CHECK-NEXT:    stp q2, q3, [x1]
3343; CHECK-NEXT:    ret
3344;
3345; NONEON-NOSVE-LABEL: trunc_v32i64_v32i32:
3346; NONEON-NOSVE:       // %bb.0:
3347; NONEON-NOSVE-NEXT:    sub sp, sp, #496
3348; NONEON-NOSVE-NEXT:    ldp q1, q0, [x0, #32]
3349; NONEON-NOSVE-NEXT:    stp x28, x27, [sp, #416] // 16-byte Folded Spill
3350; NONEON-NOSVE-NEXT:    ldp q17, q16, [x0, #192]
3351; NONEON-NOSVE-NEXT:    stp x26, x25, [sp, #432] // 16-byte Folded Spill
3352; NONEON-NOSVE-NEXT:    stp x24, x23, [sp, #448] // 16-byte Folded Spill
3353; NONEON-NOSVE-NEXT:    ldp q23, q22, [x0, #224]
3354; NONEON-NOSVE-NEXT:    stp x22, x21, [sp, #464] // 16-byte Folded Spill
3355; NONEON-NOSVE-NEXT:    ldp q3, q2, [x0]
3356; NONEON-NOSVE-NEXT:    stp x20, x19, [sp, #480] // 16-byte Folded Spill
3357; NONEON-NOSVE-NEXT:    ldp q5, q4, [x0, #96]
3358; NONEON-NOSVE-NEXT:    stp x29, x30, [sp, #400] // 16-byte Folded Spill
3359; NONEON-NOSVE-NEXT:    ldp q7, q6, [x0, #64]
3360; NONEON-NOSVE-NEXT:    ldp q19, q18, [x0, #160]
3361; NONEON-NOSVE-NEXT:    ldp q21, q20, [x0, #128]
3362; NONEON-NOSVE-NEXT:    str q0, [sp, #192]
3363; NONEON-NOSVE-NEXT:    ldr w8, [sp, #192]
3364; NONEON-NOSVE-NEXT:    stp q17, q23, [sp, #32]
3365; NONEON-NOSVE-NEXT:    ldr w9, [sp, #200]
3366; NONEON-NOSVE-NEXT:    ldr w10, [sp, #32]
3367; NONEON-NOSVE-NEXT:    stp q4, q6, [sp, #160]
3368; NONEON-NOSVE-NEXT:    ldr w12, [sp, #48]
3369; NONEON-NOSVE-NEXT:    add w6, w8, w8
3370; NONEON-NOSVE-NEXT:    add w5, w9, w9
3371; NONEON-NOSVE-NEXT:    ldr w8, [sp, #40]
3372; NONEON-NOSVE-NEXT:    stp q18, q20, [sp, #112]
3373; NONEON-NOSVE-NEXT:    ldr w25, [sp, #160]
3374; NONEON-NOSVE-NEXT:    ldr w26, [sp, #168]
3375; NONEON-NOSVE-NEXT:    str q5, [sp, #144]
3376; NONEON-NOSVE-NEXT:    ldr w21, [sp, #176]
3377; NONEON-NOSVE-NEXT:    ldr w22, [sp, #184]
3378; NONEON-NOSVE-NEXT:    stp q2, q1, [sp, #208]
3379; NONEON-NOSVE-NEXT:    ldr w23, [sp, #144]
3380; NONEON-NOSVE-NEXT:    ldr w24, [sp, #152]
3381; NONEON-NOSVE-NEXT:    str q3, [sp, #16]
3382; NONEON-NOSVE-NEXT:    ldr w9, [sp, #208]
3383; NONEON-NOSVE-NEXT:    ldr w4, [sp, #112]
3384; NONEON-NOSVE-NEXT:    stp w8, w10, [sp, #8] // 8-byte Folded Spill
3385; NONEON-NOSVE-NEXT:    ldr w8, [sp, #216]
3386; NONEON-NOSVE-NEXT:    ldr w27, [sp, #16]
3387; NONEON-NOSVE-NEXT:    add w9, w9, w9
3388; NONEON-NOSVE-NEXT:    ldr w28, [sp, #24]
3389; NONEON-NOSVE-NEXT:    stp q22, q16, [sp, #64]
3390; NONEON-NOSVE-NEXT:    add w8, w8, w8
3391; NONEON-NOSVE-NEXT:    str w9, [sp, #344]
3392; NONEON-NOSVE-NEXT:    add w9, w27, w27
3393; NONEON-NOSVE-NEXT:    str w8, [sp, #348]
3394; NONEON-NOSVE-NEXT:    add w8, w28, w28
3395; NONEON-NOSVE-NEXT:    ldr w7, [sp, #120]
3396; NONEON-NOSVE-NEXT:    stp q7, q21, [sp, #240]
3397; NONEON-NOSVE-NEXT:    ldr w18, [sp, #128]
3398; NONEON-NOSVE-NEXT:    ldr w0, [sp, #136]
3399; NONEON-NOSVE-NEXT:    str w8, [sp, #340]
3400; NONEON-NOSVE-NEXT:    add w8, w26, w26
3401; NONEON-NOSVE-NEXT:    ldr w19, [sp, #240]
3402; NONEON-NOSVE-NEXT:    str w9, [sp, #336]
3403; NONEON-NOSVE-NEXT:    add w9, w25, w25
3404; NONEON-NOSVE-NEXT:    ldr w20, [sp, #248]
3405; NONEON-NOSVE-NEXT:    str w8, [sp, #332]
3406; NONEON-NOSVE-NEXT:    add w8, w24, w24
3407; NONEON-NOSVE-NEXT:    ldr w16, [sp, #256]
3408; NONEON-NOSVE-NEXT:    str w9, [sp, #328]
3409; NONEON-NOSVE-NEXT:    add w9, w23, w23
3410; NONEON-NOSVE-NEXT:    ldr w17, [sp, #264]
3411; NONEON-NOSVE-NEXT:    str q19, [sp, #96]
3412; NONEON-NOSVE-NEXT:    ldr w14, [sp, #64]
3413; NONEON-NOSVE-NEXT:    ldr w15, [sp, #72]
3414; NONEON-NOSVE-NEXT:    str w8, [sp, #324]
3415; NONEON-NOSVE-NEXT:    add w8, w22, w22
3416; NONEON-NOSVE-NEXT:    ldr w2, [sp, #96]
3417; NONEON-NOSVE-NEXT:    str w9, [sp, #320]
3418; NONEON-NOSVE-NEXT:    add w9, w21, w21
3419; NONEON-NOSVE-NEXT:    ldr w3, [sp, #104]
3420; NONEON-NOSVE-NEXT:    str w8, [sp, #380]
3421; NONEON-NOSVE-NEXT:    add w8, w20, w20
3422; NONEON-NOSVE-NEXT:    ldr w13, [sp, #56]
3423; NONEON-NOSVE-NEXT:    str w9, [sp, #376]
3424; NONEON-NOSVE-NEXT:    add w9, w19, w19
3425; NONEON-NOSVE-NEXT:    ldr w10, [sp, #80]
3426; NONEON-NOSVE-NEXT:    str w8, [sp, #372]
3427; NONEON-NOSVE-NEXT:    add w8, w7, w7
3428; NONEON-NOSVE-NEXT:    ldr w11, [sp, #88]
3429; NONEON-NOSVE-NEXT:    str w9, [sp, #368]
3430; NONEON-NOSVE-NEXT:    add w9, w4, w4
3431; NONEON-NOSVE-NEXT:    ldr w29, [sp, #224]
3432; NONEON-NOSVE-NEXT:    str w8, [sp, #316]
3433; NONEON-NOSVE-NEXT:    add w8, w3, w3
3434; NONEON-NOSVE-NEXT:    ldr w30, [sp, #232]
3435; NONEON-NOSVE-NEXT:    str w9, [sp, #312]
3436; NONEON-NOSVE-NEXT:    add w9, w2, w2
3437; NONEON-NOSVE-NEXT:    str w8, [sp, #308]
3438; NONEON-NOSVE-NEXT:    add w8, w0, w0
3439; NONEON-NOSVE-NEXT:    str w9, [sp, #304]
3440; NONEON-NOSVE-NEXT:    add w9, w18, w18
3441; NONEON-NOSVE-NEXT:    str w8, [sp, #396]
3442; NONEON-NOSVE-NEXT:    add w8, w17, w17
3443; NONEON-NOSVE-NEXT:    str w9, [sp, #392]
3444; NONEON-NOSVE-NEXT:    add w9, w16, w16
3445; NONEON-NOSVE-NEXT:    str w8, [sp, #388]
3446; NONEON-NOSVE-NEXT:    add w8, w15, w15
3447; NONEON-NOSVE-NEXT:    str w9, [sp, #384]
3448; NONEON-NOSVE-NEXT:    add w9, w14, w14
3449; NONEON-NOSVE-NEXT:    str w8, [sp, #284]
3450; NONEON-NOSVE-NEXT:    add w8, w13, w13
3451; NONEON-NOSVE-NEXT:    str w9, [sp, #280]
3452; NONEON-NOSVE-NEXT:    add w9, w12, w12
3453; NONEON-NOSVE-NEXT:    str w8, [sp, #276]
3454; NONEON-NOSVE-NEXT:    add w8, w11, w11
3455; NONEON-NOSVE-NEXT:    str w9, [sp, #272]
3456; NONEON-NOSVE-NEXT:    add w9, w10, w10
3457; NONEON-NOSVE-NEXT:    str w8, [sp, #300]
3458; NONEON-NOSVE-NEXT:    ldr w8, [sp, #8] // 4-byte Folded Reload
3459; NONEON-NOSVE-NEXT:    str w9, [sp, #296]
3460; NONEON-NOSVE-NEXT:    ldr w9, [sp, #12] // 4-byte Folded Reload
3461; NONEON-NOSVE-NEXT:    add w8, w8, w8
3462; NONEON-NOSVE-NEXT:    str w5, [sp, #364]
3463; NONEON-NOSVE-NEXT:    add w5, w30, w30
3464; NONEON-NOSVE-NEXT:    add w9, w9, w9
3465; NONEON-NOSVE-NEXT:    str w6, [sp, #360]
3466; NONEON-NOSVE-NEXT:    add w6, w29, w29
3467; NONEON-NOSVE-NEXT:    str w5, [sp, #356]
3468; NONEON-NOSVE-NEXT:    ldp q6, q3, [sp, #304]
3469; NONEON-NOSVE-NEXT:    str w6, [sp, #352]
3470; NONEON-NOSVE-NEXT:    ldp q4, q7, [sp, #368]
3471; NONEON-NOSVE-NEXT:    str w8, [sp, #292]
3472; NONEON-NOSVE-NEXT:    ldp q1, q0, [sp, #336]
3473; NONEON-NOSVE-NEXT:    str w9, [sp, #288]
3474; NONEON-NOSVE-NEXT:    ldp x20, x19, [sp, #480] // 16-byte Folded Reload
3475; NONEON-NOSVE-NEXT:    ldp q5, q2, [sp, #272]
3476; NONEON-NOSVE-NEXT:    stp q4, q3, [x1, #32]
3477; NONEON-NOSVE-NEXT:    stp q1, q0, [x1]
3478; NONEON-NOSVE-NEXT:    ldp x22, x21, [sp, #464] // 16-byte Folded Reload
3479; NONEON-NOSVE-NEXT:    stp q7, q6, [x1, #64]
3480; NONEON-NOSVE-NEXT:    ldp x24, x23, [sp, #448] // 16-byte Folded Reload
3481; NONEON-NOSVE-NEXT:    stp q2, q5, [x1, #96]
3482; NONEON-NOSVE-NEXT:    ldp x26, x25, [sp, #432] // 16-byte Folded Reload
3483; NONEON-NOSVE-NEXT:    ldp x28, x27, [sp, #416] // 16-byte Folded Reload
3484; NONEON-NOSVE-NEXT:    ldp x29, x30, [sp, #400] // 16-byte Folded Reload
3485; NONEON-NOSVE-NEXT:    add sp, sp, #496
3486; NONEON-NOSVE-NEXT:    ret
3487  %a = load <32 x i64>, ptr %in
3488  %b = trunc <32 x i64> %a to <32 x i32>
3489  %c = add <32 x i32> %b, %b
3490  store <32 x i32> %c, ptr %out
3491  ret void
3492}
3493