xref: /llvm-project/llvm/test/CodeGen/PowerPC/aix-vec_extract_p9_2.ll (revision c2e7c9cb33acbd118fe5011a1607d6cf8e21de34)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-64
3; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-32
4
5define zeroext i8 @test_add1(<16 x i8> %a, i32 signext %index, i8 zeroext %c) {
6; CHECK-64-LABEL: test_add1:
7; CHECK-64:       # %bb.0: # %entry
8; CHECK-64-NEXT:    clrldi 3, 3, 32
9; CHECK-64-NEXT:    vextublx 3, 3, 2
10; CHECK-64-NEXT:    add 3, 3, 4
11; CHECK-64-NEXT:    clrldi 3, 3, 56
12; CHECK-64-NEXT:    blr
13;
14; CHECK-32-LABEL: test_add1:
15; CHECK-32:       # %bb.0: # %entry
16; CHECK-32-NEXT:    addi 5, 1, -16
17; CHECK-32-NEXT:    clrlwi 3, 3, 28
18; CHECK-32-NEXT:    stxv 34, -16(1)
19; CHECK-32-NEXT:    lbzx 3, 5, 3
20; CHECK-32-NEXT:    add 3, 3, 4
21; CHECK-32-NEXT:    clrlwi 3, 3, 24
22; CHECK-32-NEXT:    blr
23entry:
24  %vecext = extractelement <16 x i8> %a, i32 %index
25  %conv = zext i8 %vecext to i32
26  %conv1 = zext i8 %c to i32
27  %add = add nuw nsw i32 %conv, %conv1
28  %conv2 = trunc i32 %add to i8
29  ret i8 %conv2
30}
31
32define signext i8 @test_add2(<16 x i8> %a, i32 signext %index, i8 signext %c) {
33; CHECK-64-LABEL: test_add2:
34; CHECK-64:       # %bb.0: # %entry
35; CHECK-64-NEXT:    clrldi 3, 3, 32
36; CHECK-64-NEXT:    vextublx 3, 3, 2
37; CHECK-64-NEXT:    add 3, 3, 4
38; CHECK-64-NEXT:    extsb 3, 3
39; CHECK-64-NEXT:    blr
40;
41; CHECK-32-LABEL: test_add2:
42; CHECK-32:       # %bb.0: # %entry
43; CHECK-32-NEXT:    addi 5, 1, -16
44; CHECK-32-NEXT:    clrlwi 3, 3, 28
45; CHECK-32-NEXT:    stxv 34, -16(1)
46; CHECK-32-NEXT:    lbzx 3, 5, 3
47; CHECK-32-NEXT:    add 3, 3, 4
48; CHECK-32-NEXT:    extsb 3, 3
49; CHECK-32-NEXT:    blr
50entry:
51  %vecext = extractelement <16 x i8> %a, i32 %index
52  %conv3 = zext i8 %vecext to i32
53  %conv14 = zext i8 %c to i32
54  %add = add nuw nsw i32 %conv3, %conv14
55  %conv2 = trunc i32 %add to i8
56  ret i8 %conv2
57}
58
59define zeroext i16 @test_add3(<8 x i16> %a, i32 signext %index, i16 zeroext %c) {
60; CHECK-64-LABEL: test_add3:
61; CHECK-64:       # %bb.0: # %entry
62; CHECK-64-NEXT:    clrldi 3, 3, 32
63; CHECK-64-NEXT:    rlwinm 3, 3, 1, 28, 30
64; CHECK-64-NEXT:    vextuhlx 3, 3, 2
65; CHECK-64-NEXT:    add 3, 3, 4
66; CHECK-64-NEXT:    clrldi 3, 3, 48
67; CHECK-64-NEXT:    blr
68;
69; CHECK-32-LABEL: test_add3:
70; CHECK-32:       # %bb.0: # %entry
71; CHECK-32-NEXT:    addi 5, 1, -16
72; CHECK-32-NEXT:    rlwinm 3, 3, 1, 28, 30
73; CHECK-32-NEXT:    stxv 34, -16(1)
74; CHECK-32-NEXT:    lhzx 3, 5, 3
75; CHECK-32-NEXT:    add 3, 3, 4
76; CHECK-32-NEXT:    clrlwi 3, 3, 16
77; CHECK-32-NEXT:    blr
78entry:
79  %vecext = extractelement <8 x i16> %a, i32 %index
80  %conv = zext i16 %vecext to i32
81  %conv1 = zext i16 %c to i32
82  %add = add nuw nsw i32 %conv, %conv1
83  %conv2 = trunc i32 %add to i16
84  ret i16 %conv2
85}
86
87define signext i16 @test_add4(<8 x i16> %a, i32 signext %index, i16 signext %c) {
88; CHECK-64-LABEL: test_add4:
89; CHECK-64:       # %bb.0: # %entry
90; CHECK-64-NEXT:    clrldi 3, 3, 32
91; CHECK-64-NEXT:    rlwinm 3, 3, 1, 28, 30
92; CHECK-64-NEXT:    vextuhlx 3, 3, 2
93; CHECK-64-NEXT:    add 3, 3, 4
94; CHECK-64-NEXT:    extsh 3, 3
95; CHECK-64-NEXT:    blr
96;
97; CHECK-32-LABEL: test_add4:
98; CHECK-32:       # %bb.0: # %entry
99; CHECK-32-NEXT:    addi 5, 1, -16
100; CHECK-32-NEXT:    rlwinm 3, 3, 1, 28, 30
101; CHECK-32-NEXT:    stxv 34, -16(1)
102; CHECK-32-NEXT:    lhzx 3, 5, 3
103; CHECK-32-NEXT:    add 3, 3, 4
104; CHECK-32-NEXT:    extsh 3, 3
105; CHECK-32-NEXT:    blr
106entry:
107  %vecext = extractelement <8 x i16> %a, i32 %index
108  %conv5 = zext i16 %vecext to i32
109  %conv16 = zext i16 %c to i32
110  %add = add nuw nsw i32 %conv5, %conv16
111  %conv2 = trunc i32 %add to i16
112  ret i16 %conv2
113}
114
115define zeroext i32 @test_add5(<4 x i32> %a, i32 signext %index, i32 zeroext %c) {
116; CHECK-64-LABEL: test_add5:
117; CHECK-64:       # %bb.0: # %entry
118; CHECK-64-NEXT:    clrldi 3, 3, 32
119; CHECK-64-NEXT:    rlwinm 3, 3, 2, 28, 29
120; CHECK-64-NEXT:    vextuwlx 3, 3, 2
121; CHECK-64-NEXT:    add 3, 3, 4
122; CHECK-64-NEXT:    clrldi 3, 3, 32
123; CHECK-64-NEXT:    blr
124;
125; CHECK-32-LABEL: test_add5:
126; CHECK-32:       # %bb.0: # %entry
127; CHECK-32-NEXT:    addi 5, 1, -16
128; CHECK-32-NEXT:    rlwinm 3, 3, 2, 28, 29
129; CHECK-32-NEXT:    stxv 34, -16(1)
130; CHECK-32-NEXT:    lwzx 3, 5, 3
131; CHECK-32-NEXT:    add 3, 3, 4
132; CHECK-32-NEXT:    blr
133entry:
134  %vecext = extractelement <4 x i32> %a, i32 %index
135  %add = add i32 %vecext, %c
136  ret i32 %add
137}
138
139define signext i32 @test_add6(<4 x i32> %a, i32 signext %index, i32 signext %c) {
140; CHECK-64-LABEL: test_add6:
141; CHECK-64:       # %bb.0: # %entry
142; CHECK-64-NEXT:    clrldi 3, 3, 32
143; CHECK-64-NEXT:    rlwinm 3, 3, 2, 28, 29
144; CHECK-64-NEXT:    vextuwlx 3, 3, 2
145; CHECK-64-NEXT:    add 3, 3, 4
146; CHECK-64-NEXT:    extsw 3, 3
147; CHECK-64-NEXT:    blr
148;
149; CHECK-32-LABEL: test_add6:
150; CHECK-32:       # %bb.0: # %entry
151; CHECK-32-NEXT:    addi 5, 1, -16
152; CHECK-32-NEXT:    rlwinm 3, 3, 2, 28, 29
153; CHECK-32-NEXT:    stxv 34, -16(1)
154; CHECK-32-NEXT:    lwzx 3, 5, 3
155; CHECK-32-NEXT:    add 3, 3, 4
156; CHECK-32-NEXT:    blr
157entry:
158  %vecext = extractelement <4 x i32> %a, i32 %index
159  %add = add nsw i32 %vecext, %c
160  ret i32 %add
161}
162
163; When extracting word element 2 on LE, it's better to use mfvsrwz rather than vextuwrx
164define zeroext i32 @test7(<4 x i32> %a) {
165; CHECK-64-LABEL: test7:
166; CHECK-64:       # %bb.0: # %entry
167; CHECK-64-NEXT:    li 3, 8
168; CHECK-64-NEXT:    vextuwlx 3, 3, 2
169; CHECK-64-NEXT:    blr
170;
171; CHECK-32-LABEL: test7:
172; CHECK-32:       # %bb.0: # %entry
173; CHECK-32-NEXT:    stxv 34, -16(1)
174; CHECK-32-NEXT:    lwz 3, -8(1)
175; CHECK-32-NEXT:    blr
176entry:
177  %vecext = extractelement <4 x i32> %a, i32 2
178  ret i32 %vecext
179}
180
181define zeroext i32 @testadd_7(<4 x i32> %a, i32 zeroext %c) {
182; CHECK-64-LABEL: testadd_7:
183; CHECK-64:       # %bb.0: # %entry
184; CHECK-64-NEXT:    li 4, 8
185; CHECK-64-NEXT:    vextuwlx 4, 4, 2
186; CHECK-64-NEXT:    add 3, 4, 3
187; CHECK-64-NEXT:    clrldi 3, 3, 32
188; CHECK-64-NEXT:    blr
189;
190; CHECK-32-LABEL: testadd_7:
191; CHECK-32:       # %bb.0: # %entry
192; CHECK-32-NEXT:    stxv 34, -16(1)
193; CHECK-32-NEXT:    lwz 4, -8(1)
194; CHECK-32-NEXT:    add 3, 4, 3
195; CHECK-32-NEXT:    blr
196entry:
197  %vecext = extractelement <4 x i32> %a, i32 2
198  %add = add i32 %vecext, %c
199  ret i32 %add
200}
201
202define signext i32 @test8(<4 x i32> %a) {
203; CHECK-64-LABEL: test8:
204; CHECK-64:       # %bb.0: # %entry
205; CHECK-64-NEXT:    li 3, 8
206; CHECK-64-NEXT:    vextuwlx 3, 3, 2
207; CHECK-64-NEXT:    extsw 3, 3
208; CHECK-64-NEXT:    blr
209;
210; CHECK-32-LABEL: test8:
211; CHECK-32:       # %bb.0: # %entry
212; CHECK-32-NEXT:    stxv 34, -16(1)
213; CHECK-32-NEXT:    lwz 3, -8(1)
214; CHECK-32-NEXT:    blr
215entry:
216  %vecext = extractelement <4 x i32> %a, i32 2
217  ret i32 %vecext
218}
219
220define signext i32 @testadd_8(<4 x i32> %a, i32 signext %c) {
221; CHECK-64-LABEL: testadd_8:
222; CHECK-64:       # %bb.0: # %entry
223; CHECK-64-NEXT:    li 4, 8
224; CHECK-64-NEXT:    vextuwlx 4, 4, 2
225; CHECK-64-NEXT:    add 3, 4, 3
226; CHECK-64-NEXT:    extsw 3, 3
227; CHECK-64-NEXT:    blr
228;
229; CHECK-32-LABEL: testadd_8:
230; CHECK-32:       # %bb.0: # %entry
231; CHECK-32-NEXT:    stxv 34, -16(1)
232; CHECK-32-NEXT:    lwz 4, -8(1)
233; CHECK-32-NEXT:    add 3, 4, 3
234; CHECK-32-NEXT:    blr
235entry:
236  %vecext = extractelement <4 x i32> %a, i32 2
237  %add = add nsw i32 %vecext, %c
238  ret i32 %add
239}
240
241; When extracting word element 1 on BE, it's better to use mfvsrwz rather than vextuwlx
242define signext i32 @test9(<4 x i32> %a) {
243; CHECK-64-LABEL: test9:
244; CHECK-64:       # %bb.0: # %entry
245; CHECK-64-NEXT:    mfvsrwz 3, 34
246; CHECK-64-NEXT:    extsw 3, 3
247; CHECK-64-NEXT:    blr
248;
249; CHECK-32-LABEL: test9:
250; CHECK-32:       # %bb.0: # %entry
251; CHECK-32-NEXT:    stxv 34, -16(1)
252; CHECK-32-NEXT:    lwz 3, -12(1)
253; CHECK-32-NEXT:    blr
254entry:
255  %vecext = extractelement <4 x i32> %a, i32 1
256  ret i32 %vecext
257}
258
259define signext i32 @testadd_9(<4 x i32> %a, i32 signext %c) {
260; CHECK-64-LABEL: testadd_9:
261; CHECK-64:       # %bb.0: # %entry
262; CHECK-64-NEXT:    mfvsrwz 4, 34
263; CHECK-64-NEXT:    add 3, 4, 3
264; CHECK-64-NEXT:    extsw 3, 3
265; CHECK-64-NEXT:    blr
266;
267; CHECK-32-LABEL: testadd_9:
268; CHECK-32:       # %bb.0: # %entry
269; CHECK-32-NEXT:    stxv 34, -16(1)
270; CHECK-32-NEXT:    lwz 4, -12(1)
271; CHECK-32-NEXT:    add 3, 4, 3
272; CHECK-32-NEXT:    blr
273entry:
274  %vecext = extractelement <4 x i32> %a, i32 1
275  %add = add nsw i32 %vecext, %c
276  ret i32 %add
277}
278