xref: /llvm-project/llvm/test/CodeGen/AArch64/concatbinop.ll (revision 76ea5feb1f12ab35547a3aa1bc1b84d4bca69aa7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs -o - %s | FileCheck %s
3
4
5define <8 x i16> @concat_add(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
6; CHECK-LABEL: concat_add:
7; CHECK:       // %bb.0:
8; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
9; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
10; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
11; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
12; CHECK-NEXT:    mov v1.d[1], v3.d[0]
13; CHECK-NEXT:    mov v0.d[1], v2.d[0]
14; CHECK-NEXT:    add v0.8h, v0.8h, v1.8h
15; CHECK-NEXT:    ret
16  %x = add <4 x i16> %a, %b
17  %y = add <4 x i16> %c, %d
18  %z = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
19  ret <8 x i16> %z
20}
21
22define <8 x i16> @concat_addtunc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
23; CHECK-LABEL: concat_addtunc:
24; CHECK:       // %bb.0:
25; CHECK-NEXT:    add v2.4s, v2.4s, v3.4s
26; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
27; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
28; CHECK-NEXT:    ret
29  %x = add <4 x i32> %a, %b
30  %y = add <4 x i32> %c, %d
31  %xt = trunc <4 x i32> %x to <4 x i16>
32  %yt = trunc <4 x i32> %y to <4 x i16>
33  %z = shufflevector <4 x i16> %xt, <4 x i16> %yt, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
34  ret <8 x i16> %z
35}
36
37define <8 x i16> @concat_addtunc2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) {
38; CHECK-LABEL: concat_addtunc2:
39; CHECK:       // %bb.0:
40; CHECK-NEXT:    uzp1 v1.8h, v1.8h, v3.8h
41; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
42; CHECK-NEXT:    add v0.8h, v0.8h, v1.8h
43; CHECK-NEXT:    ret
44  %at = trunc <4 x i32> %a to <4 x i16>
45  %bt = trunc <4 x i32> %b to <4 x i16>
46  %ct = trunc <4 x i32> %c to <4 x i16>
47  %dt = trunc <4 x i32> %d to <4 x i16>
48  %x = add <4 x i16> %at, %bt
49  %y = add <4 x i16> %ct, %dt
50  %z = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
51  ret <8 x i16> %z
52}
53
54define <8 x i16> @concat_sub(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
55; CHECK-LABEL: concat_sub:
56; CHECK:       // %bb.0:
57; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
58; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
59; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
60; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
61; CHECK-NEXT:    mov v1.d[1], v3.d[0]
62; CHECK-NEXT:    mov v0.d[1], v2.d[0]
63; CHECK-NEXT:    sub v0.8h, v0.8h, v1.8h
64; CHECK-NEXT:    ret
65  %x = sub <4 x i16> %a, %b
66  %y = sub <4 x i16> %c, %d
67  %z = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
68  ret <8 x i16> %z
69}
70
71define <8 x i16> @concat_mul(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
72; CHECK-LABEL: concat_mul:
73; CHECK:       // %bb.0:
74; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
75; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
76; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
77; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
78; CHECK-NEXT:    mov v1.d[1], v3.d[0]
79; CHECK-NEXT:    mov v0.d[1], v2.d[0]
80; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
81; CHECK-NEXT:    ret
82  %x = mul <4 x i16> %a, %b
83  %y = mul <4 x i16> %c, %d
84  %z = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
85  ret <8 x i16> %z
86}
87
88define <8 x i16> @concat_xor(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) {
89; CHECK-LABEL: concat_xor:
90; CHECK:       // %bb.0:
91; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
92; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
93; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
94; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
95; CHECK-NEXT:    mov v1.d[1], v3.d[0]
96; CHECK-NEXT:    mov v0.d[1], v2.d[0]
97; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
98; CHECK-NEXT:    ret
99  %x = xor <4 x i16> %a, %b
100  %y = xor <4 x i16> %c, %d
101  %z = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
102  ret <8 x i16> %z
103}
104
105define <8 x half> @concat_fadd(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
106; CHECK-LABEL: concat_fadd:
107; CHECK:       // %bb.0:
108; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
109; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
110; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
111; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
112; CHECK-NEXT:    mov v1.d[1], v3.d[0]
113; CHECK-NEXT:    mov v0.d[1], v2.d[0]
114; CHECK-NEXT:    fadd v0.8h, v0.8h, v1.8h
115; CHECK-NEXT:    ret
116  %x = fadd <4 x half> %a, %b
117  %y = fadd <4 x half> %c, %d
118  %z = shufflevector <4 x half> %x, <4 x half> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
119  ret <8 x half> %z
120}
121
122define <8 x half> @concat_fmul(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
123; CHECK-LABEL: concat_fmul:
124; CHECK:       // %bb.0:
125; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
126; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
127; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
128; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
129; CHECK-NEXT:    mov v1.d[1], v3.d[0]
130; CHECK-NEXT:    mov v0.d[1], v2.d[0]
131; CHECK-NEXT:    fmul v0.8h, v0.8h, v1.8h
132; CHECK-NEXT:    ret
133  %x = fmul <4 x half> %a, %b
134  %y = fmul <4 x half> %c, %d
135  %z = shufflevector <4 x half> %x, <4 x half> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
136  ret <8 x half> %z
137}
138
139define <8 x half> @concat_min(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
140; CHECK-LABEL: concat_min:
141; CHECK:       // %bb.0:
142; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
143; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
144; CHECK-NEXT:    // kill: def $d3 killed $d3 def $q3
145; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
146; CHECK-NEXT:    mov v1.d[1], v3.d[0]
147; CHECK-NEXT:    mov v0.d[1], v2.d[0]
148; CHECK-NEXT:    fminnm v0.8h, v0.8h, v1.8h
149; CHECK-NEXT:    ret
150  %x = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
151  %y = call <4 x half> @llvm.minnum.v4f16(<4 x half> %c, <4 x half> %d)
152  %z = shufflevector <4 x half> %x, <4 x half> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
153  ret <8 x half> %z
154}
155
156define <8 x half> @concat_minmax(<4 x half> %a, <4 x half> %b, <4 x half> %c, <4 x half> %d) {
157; CHECK-LABEL: concat_minmax:
158; CHECK:       // %bb.0:
159; CHECK-NEXT:    fmaxnm v2.4h, v2.4h, v3.4h
160; CHECK-NEXT:    fminnm v0.4h, v0.4h, v1.4h
161; CHECK-NEXT:    mov v0.d[1], v2.d[0]
162; CHECK-NEXT:    ret
163  %x = call <4 x half> @llvm.minnum.v4f16(<4 x half> %a, <4 x half> %b)
164  %y = call <4 x half> @llvm.maxnum.v4f16(<4 x half> %c, <4 x half> %d)
165  %z = shufflevector <4 x half> %x, <4 x half> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
166  ret <8 x half> %z
167}
168
169define <16 x i8> @signOf_neon(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
170; CHECK-LABEL: signOf_neon:
171; CHECK:       // %bb.0: // %entry
172; CHECK-NEXT:    ldp q1, q2, [x0]
173; CHECK-NEXT:    movi v0.16b, #1
174; CHECK-NEXT:    ldp q3, q4, [x1]
175; CHECK-NEXT:    cmhi v5.8h, v1.8h, v3.8h
176; CHECK-NEXT:    cmhi v6.8h, v2.8h, v4.8h
177; CHECK-NEXT:    cmhi v1.8h, v3.8h, v1.8h
178; CHECK-NEXT:    cmhi v2.8h, v4.8h, v2.8h
179; CHECK-NEXT:    uzp1 v3.16b, v5.16b, v6.16b
180; CHECK-NEXT:    uzp1 v1.16b, v1.16b, v2.16b
181; CHECK-NEXT:    and v0.16b, v3.16b, v0.16b
182; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
183; CHECK-NEXT:    ret
184entry:
185  %0 = load <8 x i16>, ptr %a, align 2
186  %add.ptr = getelementptr inbounds i8, ptr %a, i64 16
187  %1 = load <8 x i16>, ptr %add.ptr, align 2
188  %2 = load <8 x i16>, ptr %b, align 2
189  %add.ptr6 = getelementptr inbounds i8, ptr %b, i64 16
190  %3 = load <8 x i16>, ptr %add.ptr6, align 2
191  %cmp.i33 = icmp ugt <8 x i16> %0, %2
192  %cmp.i31 = icmp ugt <8 x i16> %1, %3
193  %cmp.i29 = icmp ugt <8 x i16> %2, %0
194  %cmp.i = icmp ugt <8 x i16> %3, %1
195  %vmovn.i38.neg = zext <8 x i1> %cmp.i33 to <8 x i8>
196  %vmovn.i37.neg = zext <8 x i1> %cmp.i31 to <8 x i8>
197  %4 = select <8 x i1> %cmp.i29, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> %vmovn.i38.neg
198  %5 = select <8 x i1> %cmp.i, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> %vmovn.i37.neg
199  %or.i = shufflevector <8 x i8> %4, <8 x i8> %5, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
200  ret <16 x i8> %or.i
201}
202