xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-expand-div.ll (revision cc82f1290a1e2157a6c0530d78d8cc84d2b8553d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
3
4; Check that expensive divides are expanded into a more performant sequence
5
6;
7; SDIV
8;
9
10define <vscale x 16 x i8> @sdiv_i8(<vscale x 16 x i8> %a) #0 {
11; CHECK-LABEL: sdiv_i8:
12; CHECK:       // %bb.0:
13; CHECK-NEXT:    mov z1.b, #86 // =0x56
14; CHECK-NEXT:    ptrue p0.b
15; CHECK-NEXT:    smulh z0.b, p0/m, z0.b, z1.b
16; CHECK-NEXT:    lsr z1.b, z0.b, #7
17; CHECK-NEXT:    add z0.b, z0.b, z1.b
18; CHECK-NEXT:    ret
19  %div = sdiv <vscale x 16 x i8> %a, splat (i8 3)
20  ret <vscale x 16 x i8> %div
21}
22
23define <vscale x 8 x i16> @sdiv_i16(<vscale x 8 x i16> %a) #0 {
24; CHECK-LABEL: sdiv_i16:
25; CHECK:       // %bb.0:
26; CHECK-NEXT:    mov w8, #21846 // =0x5556
27; CHECK-NEXT:    ptrue p0.h
28; CHECK-NEXT:    mov z1.h, w8
29; CHECK-NEXT:    smulh z0.h, p0/m, z0.h, z1.h
30; CHECK-NEXT:    lsr z1.h, z0.h, #15
31; CHECK-NEXT:    add z0.h, z0.h, z1.h
32; CHECK-NEXT:    ret
33  %div = sdiv <vscale x 8 x i16> %a, splat (i16 3)
34  ret <vscale x 8 x i16> %div
35}
36
37define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i32> %a) #0 {
38; CHECK-LABEL: sdiv_i32:
39; CHECK:       // %bb.0:
40; CHECK-NEXT:    mov w8, #21846 // =0x5556
41; CHECK-NEXT:    ptrue p0.s
42; CHECK-NEXT:    movk w8, #21845, lsl #16
43; CHECK-NEXT:    mov z1.s, w8
44; CHECK-NEXT:    smulh z0.s, p0/m, z0.s, z1.s
45; CHECK-NEXT:    lsr z1.s, z0.s, #31
46; CHECK-NEXT:    add z0.s, z0.s, z1.s
47; CHECK-NEXT:    ret
48  %div = sdiv <vscale x 4 x i32> %a, splat (i32 3)
49  ret <vscale x 4 x i32> %div
50}
51
52define <vscale x 2 x i64> @sdiv_i64(<vscale x 2 x i64> %a) #0 {
53; CHECK-LABEL: sdiv_i64:
54; CHECK:       // %bb.0:
55; CHECK-NEXT:    mov x8, #6148914691236517205 // =0x5555555555555555
56; CHECK-NEXT:    ptrue p0.d
57; CHECK-NEXT:    movk x8, #21846
58; CHECK-NEXT:    mov z1.d, x8
59; CHECK-NEXT:    smulh z0.d, p0/m, z0.d, z1.d
60; CHECK-NEXT:    lsr z1.d, z0.d, #63
61; CHECK-NEXT:    add z0.d, z0.d, z1.d
62; CHECK-NEXT:    ret
63  %div = sdiv <vscale x 2 x i64> %a, splat (i64 3)
64  ret <vscale x 2 x i64> %div
65}
66
67;
68; UDIV
69;
70
71define <vscale x 16 x i8> @udiv_i8(<vscale x 16 x i8> %a) #0 {
72; CHECK-LABEL: udiv_i8:
73; CHECK:       // %bb.0:
74; CHECK-NEXT:    mov z1.b, #-85 // =0xffffffffffffffab
75; CHECK-NEXT:    ptrue p0.b
76; CHECK-NEXT:    umulh z0.b, p0/m, z0.b, z1.b
77; CHECK-NEXT:    lsr z0.b, z0.b, #1
78; CHECK-NEXT:    ret
79  %div = udiv <vscale x 16 x i8> %a, splat (i8 3)
80  ret <vscale x 16 x i8> %div
81}
82
83define <vscale x 8 x i16> @udiv_i16(<vscale x 8 x i16> %a) #0 {
84; CHECK-LABEL: udiv_i16:
85; CHECK:       // %bb.0:
86; CHECK-NEXT:    mov w8, #-21845 // =0xffffaaab
87; CHECK-NEXT:    ptrue p0.h
88; CHECK-NEXT:    mov z1.h, w8
89; CHECK-NEXT:    umulh z0.h, p0/m, z0.h, z1.h
90; CHECK-NEXT:    lsr z0.h, z0.h, #1
91; CHECK-NEXT:    ret
92  %div = udiv <vscale x 8 x i16> %a, splat (i16 3)
93  ret <vscale x 8 x i16> %div
94}
95
96define <vscale x 4 x i32> @udiv_i32(<vscale x 4 x i32> %a) #0 {
97; CHECK-LABEL: udiv_i32:
98; CHECK:       // %bb.0:
99; CHECK-NEXT:    mov w8, #43691 // =0xaaab
100; CHECK-NEXT:    ptrue p0.s
101; CHECK-NEXT:    movk w8, #43690, lsl #16
102; CHECK-NEXT:    mov z1.s, w8
103; CHECK-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
104; CHECK-NEXT:    lsr z0.s, z0.s, #1
105; CHECK-NEXT:    ret
106  %div = udiv <vscale x 4 x i32> %a, splat (i32 3)
107  ret <vscale x 4 x i32> %div
108}
109
110define <vscale x 2 x i64> @udiv_i64(<vscale x 2 x i64> %a) #0 {
111; CHECK-LABEL: udiv_i64:
112; CHECK:       // %bb.0:
113; CHECK-NEXT:    mov x8, #-6148914691236517206 // =0xaaaaaaaaaaaaaaaa
114; CHECK-NEXT:    ptrue p0.d
115; CHECK-NEXT:    movk x8, #43691
116; CHECK-NEXT:    mov z1.d, x8
117; CHECK-NEXT:    umulh z0.d, p0/m, z0.d, z1.d
118; CHECK-NEXT:    lsr z0.d, z0.d, #1
119; CHECK-NEXT:    ret
120  %div = udiv <vscale x 2 x i64> %a, splat (i64 3)
121  ret <vscale x 2 x i64> %div
122}
123
124attributes #0 = { "target-features"="+sve" }
125