xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll (revision db158c7c830807caeeb0691739c41f1d522029e9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256
3; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
5
6target triple = "aarch64-unknown-linux-gnu"
7
8define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) vscale_range(2,0) #0 {
9; CHECK-LABEL: sdiv_v8i8:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    ptrue p0.b, vl8
12; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
13; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #1
14; CHECK-NEXT:    subr z0.b, z0.b, #0 // =0x0
15; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
16; CHECK-NEXT:    ret
17  %res = sdiv <8 x i8> %op1, shufflevector (<8 x i8> insertelement (<8 x i8> poison, i8 -2, i32 0), <8 x i8> poison, <8 x i32> zeroinitializer)
18  ret <8 x i8> %res
19}
20
21define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) vscale_range(2,0) #0 {
22; CHECK-LABEL: sdiv_v16i8:
23; CHECK:       // %bb.0:
24; CHECK-NEXT:    ptrue p0.b, vl16
25; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
26; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
27; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
28; CHECK-NEXT:    ret
29  %res = sdiv <16 x i8> %op1, shufflevector (<16 x i8> insertelement (<16 x i8> poison, i8 32, i32 0), <16 x i8> poison, <16 x i32> zeroinitializer)
30  ret <16 x i8> %res
31}
32
33define void @sdiv_v32i8(ptr %a) vscale_range(2,0) #0 {
34; CHECK-LABEL: sdiv_v32i8:
35; CHECK:       // %bb.0:
36; CHECK-NEXT:    ptrue p0.b, vl32
37; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
38; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
39; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
40; CHECK-NEXT:    ret
41  %op1 = load <32 x i8>, ptr %a
42  %res = sdiv <32 x i8> %op1, shufflevector (<32 x i8> insertelement (<32 x i8> poison, i8 32, i32 0), <32 x i8> poison, <32 x i32> zeroinitializer)
43  store <32 x i8> %res, ptr %a
44  ret void
45}
46
47define void @sdiv_v64i8(ptr %a) #0 {
48; VBITS_GE_256-LABEL: sdiv_v64i8:
49; VBITS_GE_256:       // %bb.0:
50; VBITS_GE_256-NEXT:    ptrue p0.b, vl32
51; VBITS_GE_256-NEXT:    mov w8, #32 // =0x20
52; VBITS_GE_256-NEXT:    ld1b { z0.b }, p0/z, [x0, x8]
53; VBITS_GE_256-NEXT:    ld1b { z1.b }, p0/z, [x0]
54; VBITS_GE_256-NEXT:    asrd z0.b, p0/m, z0.b, #5
55; VBITS_GE_256-NEXT:    asrd z1.b, p0/m, z1.b, #5
56; VBITS_GE_256-NEXT:    st1b { z0.b }, p0, [x0, x8]
57; VBITS_GE_256-NEXT:    st1b { z1.b }, p0, [x0]
58; VBITS_GE_256-NEXT:    ret
59;
60; VBITS_GE_512-LABEL: sdiv_v64i8:
61; VBITS_GE_512:       // %bb.0:
62; VBITS_GE_512-NEXT:    ptrue p0.b, vl64
63; VBITS_GE_512-NEXT:    ld1b { z0.b }, p0/z, [x0]
64; VBITS_GE_512-NEXT:    asrd z0.b, p0/m, z0.b, #5
65; VBITS_GE_512-NEXT:    st1b { z0.b }, p0, [x0]
66; VBITS_GE_512-NEXT:    ret
67  %op1 = load <64 x i8>, ptr %a
68  %res = sdiv <64 x i8> %op1, shufflevector (<64 x i8> insertelement (<64 x i8> poison, i8 32, i32 0), <64 x i8> poison, <64 x i32> zeroinitializer)
69  store <64 x i8> %res, ptr %a
70  ret void
71}
72
73define void @sdiv_v128i8(ptr %a) vscale_range(8,0) #0 {
74; CHECK-LABEL: sdiv_v128i8:
75; CHECK:       // %bb.0:
76; CHECK-NEXT:    ptrue p0.b, vl128
77; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
78; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #2
79; CHECK-NEXT:    subr z0.b, z0.b, #0 // =0x0
80; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
81; CHECK-NEXT:    ret
82  %op1 = load <128 x i8>, ptr %a
83  %res = sdiv <128 x i8> %op1, shufflevector (<128 x i8> insertelement (<128 x i8> poison, i8 -4, i32 0), <128 x i8> poison, <128 x i32> zeroinitializer)
84  store <128 x i8> %res, ptr %a
85  ret void
86}
87
88define void @sdiv_v256i8(ptr %a) vscale_range(16,0) #0 {
89; CHECK-LABEL: sdiv_v256i8:
90; CHECK:       // %bb.0:
91; CHECK-NEXT:    ptrue p0.b, vl256
92; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
93; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
94; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
95; CHECK-NEXT:    ret
96  %op1 = load <256 x i8>, ptr %a
97  %res = sdiv <256 x i8> %op1, shufflevector (<256 x i8> insertelement (<256 x i8> poison, i8 32, i32 0), <256 x i8> poison, <256 x i32> zeroinitializer)
98  store <256 x i8> %res, ptr %a
99  ret void
100}
101
102define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) vscale_range(2,0) #0 {
103; CHECK-LABEL: sdiv_v4i16:
104; CHECK:       // %bb.0:
105; CHECK-NEXT:    ptrue p0.h, vl4
106; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
107; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
108; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
109; CHECK-NEXT:    ret
110  %res = sdiv <4 x i16> %op1, shufflevector (<4 x i16> insertelement (<4 x i16> poison, i16 32, i32 0), <4 x i16> poison, <4 x i32> zeroinitializer)
111  ret <4 x i16> %res
112}
113
114define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) vscale_range(2,0) #0 {
115; CHECK-LABEL: sdiv_v8i16:
116; CHECK:       // %bb.0:
117; CHECK-NEXT:    ptrue p0.h, vl8
118; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
119; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #3
120; CHECK-NEXT:    subr z0.h, z0.h, #0 // =0x0
121; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
122; CHECK-NEXT:    ret
123  %res = sdiv <8 x i16> %op1, shufflevector (<8 x i16> insertelement (<8 x i16> poison, i16 -8, i32 0), <8 x i16> poison, <8 x i32> zeroinitializer)
124  ret <8 x i16> %res
125}
126
127define void @sdiv_v16i16(ptr %a) vscale_range(2,0) #0 {
128; CHECK-LABEL: sdiv_v16i16:
129; CHECK:       // %bb.0:
130; CHECK-NEXT:    ptrue p0.h, vl16
131; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
132; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
133; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
134; CHECK-NEXT:    ret
135  %op1 = load <16 x i16>, ptr %a
136  %res = sdiv <16 x i16> %op1, shufflevector (<16 x i16> insertelement (<16 x i16> poison, i16 32, i32 0), <16 x i16> poison, <16 x i32> zeroinitializer)
137  store <16 x i16> %res, ptr %a
138  ret void
139}
140
141define void @sdiv_v32i16(ptr %a) #0 {
142; VBITS_GE_256-LABEL: sdiv_v32i16:
143; VBITS_GE_256:       // %bb.0:
144; VBITS_GE_256-NEXT:    ptrue p0.h, vl16
145; VBITS_GE_256-NEXT:    mov x8, #16 // =0x10
146; VBITS_GE_256-NEXT:    ld1h { z0.h }, p0/z, [x0, x8, lsl #1]
147; VBITS_GE_256-NEXT:    ld1h { z1.h }, p0/z, [x0]
148; VBITS_GE_256-NEXT:    asrd z0.h, p0/m, z0.h, #5
149; VBITS_GE_256-NEXT:    asrd z1.h, p0/m, z1.h, #5
150; VBITS_GE_256-NEXT:    st1h { z0.h }, p0, [x0, x8, lsl #1]
151; VBITS_GE_256-NEXT:    st1h { z1.h }, p0, [x0]
152; VBITS_GE_256-NEXT:    ret
153;
154; VBITS_GE_512-LABEL: sdiv_v32i16:
155; VBITS_GE_512:       // %bb.0:
156; VBITS_GE_512-NEXT:    ptrue p0.h, vl32
157; VBITS_GE_512-NEXT:    ld1h { z0.h }, p0/z, [x0]
158; VBITS_GE_512-NEXT:    asrd z0.h, p0/m, z0.h, #5
159; VBITS_GE_512-NEXT:    st1h { z0.h }, p0, [x0]
160; VBITS_GE_512-NEXT:    ret
161  %op1 = load <32 x i16>, ptr %a
162  %res = sdiv <32 x i16> %op1, shufflevector (<32 x i16> insertelement (<32 x i16> poison, i16 32, i32 0), <32 x i16> poison, <32 x i32> zeroinitializer)
163  store <32 x i16> %res, ptr %a
164  ret void
165}
166
167define void @sdiv_v64i16(ptr %a) vscale_range(8,0) #0 {
168; CHECK-LABEL: sdiv_v64i16:
169; CHECK:       // %bb.0:
170; CHECK-NEXT:    ptrue p0.h, vl64
171; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
172; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #4
173; CHECK-NEXT:    subr z0.h, z0.h, #0 // =0x0
174; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
175; CHECK-NEXT:    ret
176  %op1 = load <64 x i16>, ptr %a
177  %res = sdiv <64 x i16> %op1, shufflevector (<64 x i16> insertelement (<64 x i16> poison, i16 -16, i32 0), <64 x i16> poison, <64 x i32> zeroinitializer)
178  store <64 x i16> %res, ptr %a
179  ret void
180}
181
182define void @sdiv_v128i16(ptr %a) vscale_range(16,0) #0 {
183; CHECK-LABEL: sdiv_v128i16:
184; CHECK:       // %bb.0:
185; CHECK-NEXT:    ptrue p0.h, vl128
186; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
187; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
188; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
189; CHECK-NEXT:    ret
190  %op1 = load <128 x i16>, ptr %a
191  %res = sdiv <128 x i16> %op1, shufflevector (<128 x i16> insertelement (<128 x i16> poison, i16 32, i32 0), <128 x i16> poison, <128 x i32> zeroinitializer)
192  store <128 x i16> %res, ptr %a
193  ret void
194}
195
196define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) vscale_range(2,0) #0 {
197; CHECK-LABEL: sdiv_v2i32:
198; CHECK:       // %bb.0:
199; CHECK-NEXT:    ptrue p0.s, vl2
200; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
201; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
202; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
203; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
204; CHECK-NEXT:    ret
205  %res = sdiv <2 x i32> %op1, shufflevector (<2 x i32> insertelement (<2 x i32> poison, i32 -32, i32 0), <2 x i32> poison, <2 x i32> zeroinitializer)
206  ret <2 x i32> %res
207}
208
209define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) vscale_range(2,0) #0 {
210; CHECK-LABEL: sdiv_v4i32:
211; CHECK:       // %bb.0:
212; CHECK-NEXT:    ptrue p0.s, vl4
213; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
214; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
215; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
216; CHECK-NEXT:    ret
217  %res = sdiv <4 x i32> %op1, shufflevector (<4 x i32> insertelement (<4 x i32> poison, i32 32, i32 0), <4 x i32> poison, <4 x i32> zeroinitializer)
218  ret <4 x i32> %res
219}
220
221define void @sdiv_v8i32(ptr %a) vscale_range(2,0) #0 {
222; CHECK-LABEL: sdiv_v8i32:
223; CHECK:       // %bb.0:
224; CHECK-NEXT:    ptrue p0.s, vl8
225; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
226; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #6
227; CHECK-NEXT:    subr z0.s, z0.s, #0 // =0x0
228; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
229; CHECK-NEXT:    ret
230  %op1 = load <8 x i32>, ptr %a
231  %res = sdiv <8 x i32> %op1, shufflevector (<8 x i32> insertelement (<8 x i32> poison, i32 -64, i32 0), <8 x i32> poison, <8 x i32> zeroinitializer)
232  store <8 x i32> %res, ptr %a
233  ret void
234}
235
236define void @sdiv_v16i32(ptr %a) #0 {
237; VBITS_GE_256-LABEL: sdiv_v16i32:
238; VBITS_GE_256:       // %bb.0:
239; VBITS_GE_256-NEXT:    ptrue p0.s, vl8
240; VBITS_GE_256-NEXT:    mov x8, #8 // =0x8
241; VBITS_GE_256-NEXT:    ld1w { z0.s }, p0/z, [x0, x8, lsl #2]
242; VBITS_GE_256-NEXT:    ld1w { z1.s }, p0/z, [x0]
243; VBITS_GE_256-NEXT:    asrd z0.s, p0/m, z0.s, #5
244; VBITS_GE_256-NEXT:    asrd z1.s, p0/m, z1.s, #5
245; VBITS_GE_256-NEXT:    st1w { z0.s }, p0, [x0, x8, lsl #2]
246; VBITS_GE_256-NEXT:    st1w { z1.s }, p0, [x0]
247; VBITS_GE_256-NEXT:    ret
248;
249; VBITS_GE_512-LABEL: sdiv_v16i32:
250; VBITS_GE_512:       // %bb.0:
251; VBITS_GE_512-NEXT:    ptrue p0.s, vl16
252; VBITS_GE_512-NEXT:    ld1w { z0.s }, p0/z, [x0]
253; VBITS_GE_512-NEXT:    asrd z0.s, p0/m, z0.s, #5
254; VBITS_GE_512-NEXT:    st1w { z0.s }, p0, [x0]
255; VBITS_GE_512-NEXT:    ret
256  %op1 = load <16 x i32>, ptr %a
257  %res = sdiv <16 x i32> %op1, shufflevector (<16 x i32> insertelement (<16 x i32> poison, i32 32, i32 0), <16 x i32> poison, <16 x i32> zeroinitializer)
258  store <16 x i32> %res, ptr %a
259  ret void
260}
261
262define void @sdiv_v32i32(ptr %a) vscale_range(8,0) #0 {
263; CHECK-LABEL: sdiv_v32i32:
264; CHECK:       // %bb.0:
265; CHECK-NEXT:    ptrue p0.s, vl32
266; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
267; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
268; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
269; CHECK-NEXT:    ret
270  %op1 = load <32 x i32>, ptr %a
271  %res = sdiv <32 x i32> %op1, shufflevector (<32 x i32> insertelement (<32 x i32> poison, i32 32, i32 0), <32 x i32> poison, <32 x i32> zeroinitializer)
272  store <32 x i32> %res, ptr %a
273  ret void
274}
275
276define void @sdiv_v64i32(ptr %a) vscale_range(16,0) #0 {
277; CHECK-LABEL: sdiv_v64i32:
278; CHECK:       // %bb.0:
279; CHECK-NEXT:    ptrue p0.s, vl64
280; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
281; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
282; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
283; CHECK-NEXT:    ret
284  %op1 = load <64 x i32>, ptr %a
285  %res = sdiv <64 x i32> %op1, shufflevector (<64 x i32> insertelement (<64 x i32> poison, i32 32, i32 0), <64 x i32> poison, <64 x i32> zeroinitializer)
286  store <64 x i32> %res, ptr %a
287  ret void
288}
289
290define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) vscale_range(2,0) #0 {
291; CHECK-LABEL: sdiv_v1i64:
292; CHECK:       // %bb.0:
293; CHECK-NEXT:    ptrue p0.d, vl1
294; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
295; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #7
296; CHECK-NEXT:    subr z0.d, z0.d, #0 // =0x0
297; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
298; CHECK-NEXT:    ret
299  %res = sdiv <1 x i64> %op1, shufflevector (<1 x i64> insertelement (<1 x i64> poison, i64 -128, i32 0), <1 x i64> poison, <1 x i32> zeroinitializer)
300  ret <1 x i64> %res
301}
302
303; Vector i64 sdiv are not legal for NEON so use SVE when available.
304define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) vscale_range(2,0) #0 {
305; CHECK-LABEL: sdiv_v2i64:
306; CHECK:       // %bb.0:
307; CHECK-NEXT:    ptrue p0.d, vl2
308; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
309; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
310; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
311; CHECK-NEXT:    ret
312  %res = sdiv <2 x i64> %op1, shufflevector (<2 x i64> insertelement (<2 x i64> poison, i64 32, i32 0), <2 x i64> poison, <2 x i32> zeroinitializer)
313  ret <2 x i64> %res
314}
315
316define void @sdiv_v4i64(ptr %a) vscale_range(2,0) #0 {
317; CHECK-LABEL: sdiv_v4i64:
318; CHECK:       // %bb.0:
319; CHECK-NEXT:    ptrue p0.d, vl4
320; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
321; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #8
322; CHECK-NEXT:    subr z0.d, z0.d, #0 // =0x0
323; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
324; CHECK-NEXT:    ret
325  %op1 = load <4 x i64>, ptr %a
326  %res = sdiv <4 x i64> %op1, shufflevector (<4 x i64> insertelement (<4 x i64> poison, i64 -256, i32 0), <4 x i64> poison, <4 x i32> zeroinitializer)
327  store <4 x i64> %res, ptr %a
328  ret void
329}
330
331define void @sdiv_v8i64(ptr %a) #0 {
332; VBITS_GE_256-LABEL: sdiv_v8i64:
333; VBITS_GE_256:       // %bb.0:
334; VBITS_GE_256-NEXT:    ptrue p0.d, vl4
335; VBITS_GE_256-NEXT:    mov x8, #4 // =0x4
336; VBITS_GE_256-NEXT:    ld1d { z0.d }, p0/z, [x0, x8, lsl #3]
337; VBITS_GE_256-NEXT:    ld1d { z1.d }, p0/z, [x0]
338; VBITS_GE_256-NEXT:    asrd z0.d, p0/m, z0.d, #5
339; VBITS_GE_256-NEXT:    asrd z1.d, p0/m, z1.d, #5
340; VBITS_GE_256-NEXT:    st1d { z0.d }, p0, [x0, x8, lsl #3]
341; VBITS_GE_256-NEXT:    st1d { z1.d }, p0, [x0]
342; VBITS_GE_256-NEXT:    ret
343;
344; VBITS_GE_512-LABEL: sdiv_v8i64:
345; VBITS_GE_512:       // %bb.0:
346; VBITS_GE_512-NEXT:    ptrue p0.d, vl8
347; VBITS_GE_512-NEXT:    ld1d { z0.d }, p0/z, [x0]
348; VBITS_GE_512-NEXT:    asrd z0.d, p0/m, z0.d, #5
349; VBITS_GE_512-NEXT:    st1d { z0.d }, p0, [x0]
350; VBITS_GE_512-NEXT:    ret
351  %op1 = load <8 x i64>, ptr %a
352  %res = sdiv <8 x i64> %op1, shufflevector (<8 x i64> insertelement (<8 x i64> poison, i64 32, i32 0), <8 x i64> poison, <8 x i32> zeroinitializer)
353  store <8 x i64> %res, ptr %a
354  ret void
355}
356
357define void @sdiv_v16i64(ptr %a) vscale_range(8,0) #0 {
358; CHECK-LABEL: sdiv_v16i64:
359; CHECK:       // %bb.0:
360; CHECK-NEXT:    ptrue p0.d, vl16
361; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
362; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
363; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
364; CHECK-NEXT:    ret
365  %op1 = load <16 x i64>, ptr %a
366  %res = sdiv <16 x i64> %op1, shufflevector (<16 x i64> insertelement (<16 x i64> poison, i64 32, i32 0), <16 x i64> poison, <16 x i32> zeroinitializer)
367  store <16 x i64> %res, ptr %a
368  ret void
369}
370
371define void @sdiv_v32i64(ptr %a) vscale_range(16,0) #0 {
372; CHECK-LABEL: sdiv_v32i64:
373; CHECK:       // %bb.0:
374; CHECK-NEXT:    ptrue p0.d, vl32
375; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
376; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #9
377; CHECK-NEXT:    subr z0.d, z0.d, #0 // =0x0
378; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
379; CHECK-NEXT:    ret
380  %op1 = load <32 x i64>, ptr %a
381  %res = sdiv <32 x i64> %op1, shufflevector (<32 x i64> insertelement (<32 x i64> poison, i64 -512, i32 0), <32 x i64> poison, <32 x i32> zeroinitializer)
382  store <32 x i64> %res, ptr %a
383  ret void
384}
385
386attributes #0 = { "target-features"="+sve" }
387