xref: /llvm-project/llvm/test/CodeGen/NVPTX/extractelement.ll (revision 310e79875752886a7713911e2a1ec14bc75bd4b3)
1fe7d2841SArtem Belevich; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2b279f6b0SFangrui Song; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
3b279f6b0SFangrui Song; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
4fe7d2841SArtem Belevichtarget triple = "nvptx64-nvidia-cuda"
5d06b3e3bSArtem Belevich
6d06b3e3bSArtem Belevich
7d06b3e3bSArtem Belevichdefine i16  @test_v2i8(i16 %a) {
8fe7d2841SArtem Belevich; CHECK-LABEL: test_v2i8(
9fe7d2841SArtem Belevich; CHECK:       {
10fe7d2841SArtem Belevich; CHECK-NEXT:    .reg .b16 %rs<5>;
11fe7d2841SArtem Belevich; CHECK-NEXT:    .reg .b32 %r<2>;
12fe7d2841SArtem Belevich; CHECK-EMPTY:
13fe7d2841SArtem Belevich; CHECK-NEXT:  // %bb.0:
14fe7d2841SArtem Belevich; CHECK-NEXT:    ld.param.u16 %rs1, [test_v2i8_param_0];
15fe7d2841SArtem Belevich; CHECK-NEXT:    cvt.s16.s8 %rs2, %rs1;
16fe7d2841SArtem Belevich; CHECK-NEXT:    shr.s16 %rs3, %rs1, 8;
17fe7d2841SArtem Belevich; CHECK-NEXT:    add.s16 %rs4, %rs2, %rs3;
18fe7d2841SArtem Belevich; CHECK-NEXT:    cvt.u32.u16 %r1, %rs4;
190f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
20fe7d2841SArtem Belevich; CHECK-NEXT:    ret;
21d06b3e3bSArtem Belevich  %v = bitcast i16 %a to <2 x i8>
22d06b3e3bSArtem Belevich  %r0 = extractelement <2 x i8> %v, i64 0
23d06b3e3bSArtem Belevich  %r1 = extractelement <2 x i8> %v, i64 1
24d06b3e3bSArtem Belevich  %r0i = sext i8 %r0 to i16
25d06b3e3bSArtem Belevich  %r1i = sext i8 %r1 to i16
26d06b3e3bSArtem Belevich  %r01 = add i16 %r0i, %r1i
27d06b3e3bSArtem Belevich  ret i16 %r01
28d06b3e3bSArtem Belevich}
29d06b3e3bSArtem Belevich
30fe7d2841SArtem Belevichdefine i1  @test_v2i8_load(ptr %a) {
31fe7d2841SArtem Belevich; CHECK-LABEL: test_v2i8_load(
32fe7d2841SArtem Belevich; CHECK:       {
33fe7d2841SArtem Belevich; CHECK-NEXT:    .reg .pred %p<2>;
34fe7d2841SArtem Belevich; CHECK-NEXT:    .reg .b16 %rs<7>;
35fe7d2841SArtem Belevich; CHECK-NEXT:    .reg .b32 %r<2>;
36fe7d2841SArtem Belevich; CHECK-NEXT:    .reg .b64 %rd<2>;
37fe7d2841SArtem Belevich; CHECK-EMPTY:
38fe7d2841SArtem Belevich; CHECK-NEXT:  // %bb.0:
39fe7d2841SArtem Belevich; CHECK-NEXT:    ld.param.u64 %rd1, [test_v2i8_load_param_0];
40fe7d2841SArtem Belevich; CHECK-NEXT:    ld.v2.u8 {%rs1, %rs2}, [%rd1];
41fe7d2841SArtem Belevich; CHECK-NEXT:    or.b16 %rs5, %rs1, %rs2;
42fe7d2841SArtem Belevich; CHECK-NEXT:    and.b16 %rs6, %rs5, 255;
43fe7d2841SArtem Belevich; CHECK-NEXT:    setp.eq.s16 %p1, %rs6, 0;
44fe7d2841SArtem Belevich; CHECK-NEXT:    selp.u32 %r1, 1, 0, %p1;
450f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
46fe7d2841SArtem Belevich; CHECK-NEXT:    ret;
47fe7d2841SArtem Belevich  %v = load <2 x i8>, ptr %a, align 4
48fe7d2841SArtem Belevich  %r0 = extractelement <2 x i8> %v, i64 0
49fe7d2841SArtem Belevich  %r1 = extractelement <2 x i8> %v, i64 1
50fe7d2841SArtem Belevich  %icmp = icmp eq i8 %r0, 0
51fe7d2841SArtem Belevich  %icmp3 = icmp eq i8 %r1, 0
52fe7d2841SArtem Belevich  %select = select i1 %icmp, i1 %icmp3, i1 false
53fe7d2841SArtem Belevich  ret i1 %select
54fe7d2841SArtem Belevich}
55d06b3e3bSArtem Belevichdefine i16  @test_v4i8(i32 %a) {
56fe7d2841SArtem Belevich; CHECK-LABEL: test_v4i8(
57fe7d2841SArtem Belevich; CHECK:       {
58fe7d2841SArtem Belevich; CHECK-NEXT:    .reg .b16 %rs<8>;
59fe7d2841SArtem Belevich; CHECK-NEXT:    .reg .b32 %r<7>;
60fe7d2841SArtem Belevich; CHECK-EMPTY:
61fe7d2841SArtem Belevich; CHECK-NEXT:  // %bb.0:
62fe7d2841SArtem Belevich; CHECK-NEXT:    ld.param.u32 %r1, [test_v4i8_param_0];
63fe7d2841SArtem Belevich; CHECK-NEXT:    bfe.s32 %r2, %r1, 0, 8;
64fe7d2841SArtem Belevich; CHECK-NEXT:    cvt.s8.s32 %rs1, %r2;
65fe7d2841SArtem Belevich; CHECK-NEXT:    bfe.s32 %r3, %r1, 8, 8;
66fe7d2841SArtem Belevich; CHECK-NEXT:    cvt.s8.s32 %rs2, %r3;
67fe7d2841SArtem Belevich; CHECK-NEXT:    bfe.s32 %r4, %r1, 16, 8;
68fe7d2841SArtem Belevich; CHECK-NEXT:    cvt.s8.s32 %rs3, %r4;
69fe7d2841SArtem Belevich; CHECK-NEXT:    bfe.s32 %r5, %r1, 24, 8;
70fe7d2841SArtem Belevich; CHECK-NEXT:    cvt.s8.s32 %rs4, %r5;
71fe7d2841SArtem Belevich; CHECK-NEXT:    add.s16 %rs5, %rs1, %rs2;
72fe7d2841SArtem Belevich; CHECK-NEXT:    add.s16 %rs6, %rs3, %rs4;
73fe7d2841SArtem Belevich; CHECK-NEXT:    add.s16 %rs7, %rs5, %rs6;
74fe7d2841SArtem Belevich; CHECK-NEXT:    cvt.u32.u16 %r6, %rs7;
750f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
76fe7d2841SArtem Belevich; CHECK-NEXT:    ret;
77d06b3e3bSArtem Belevich  %v = bitcast i32 %a to <4 x i8>
78d06b3e3bSArtem Belevich  %r0 = extractelement <4 x i8> %v, i64 0
79d06b3e3bSArtem Belevich  %r1 = extractelement <4 x i8> %v, i64 1
80d06b3e3bSArtem Belevich  %r2 = extractelement <4 x i8> %v, i64 2
81d06b3e3bSArtem Belevich  %r3 = extractelement <4 x i8> %v, i64 3
82d06b3e3bSArtem Belevich  %r0i = sext i8 %r0 to i16
83d06b3e3bSArtem Belevich  %r1i = sext i8 %r1 to i16
84d06b3e3bSArtem Belevich  %r2i = sext i8 %r2 to i16
85d06b3e3bSArtem Belevich  %r3i = sext i8 %r3 to i16
86d06b3e3bSArtem Belevich  %r01 = add i16 %r0i, %r1i
87d06b3e3bSArtem Belevich  %r23 = add i16 %r2i, %r3i
88d06b3e3bSArtem Belevich  %r = add i16 %r01, %r23
89d06b3e3bSArtem Belevich  ret i16 %r
90d06b3e3bSArtem Belevich}
91d06b3e3bSArtem Belevich
92cbafb6f2SArtem Belevichdefine i32  @test_v4i8_s32(i32 %a) {
93fe7d2841SArtem Belevich; CHECK-LABEL: test_v4i8_s32(
94fe7d2841SArtem Belevich; CHECK:       {
95fe7d2841SArtem Belevich; CHECK-NEXT:    .reg .b32 %r<9>;
96fe7d2841SArtem Belevich; CHECK-EMPTY:
97fe7d2841SArtem Belevich; CHECK-NEXT:  // %bb.0:
98fe7d2841SArtem Belevich; CHECK-NEXT:    ld.param.u32 %r1, [test_v4i8_s32_param_0];
99fe7d2841SArtem Belevich; CHECK-NEXT:    bfe.s32 %r2, %r1, 0, 8;
100fe7d2841SArtem Belevich; CHECK-NEXT:    bfe.s32 %r3, %r1, 8, 8;
101fe7d2841SArtem Belevich; CHECK-NEXT:    bfe.s32 %r4, %r1, 16, 8;
102fe7d2841SArtem Belevich; CHECK-NEXT:    bfe.s32 %r5, %r1, 24, 8;
103fe7d2841SArtem Belevich; CHECK-NEXT:    add.s32 %r6, %r2, %r3;
104fe7d2841SArtem Belevich; CHECK-NEXT:    add.s32 %r7, %r4, %r5;
105fe7d2841SArtem Belevich; CHECK-NEXT:    add.s32 %r8, %r6, %r7;
1060f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r8;
107fe7d2841SArtem Belevich; CHECK-NEXT:    ret;
108cbafb6f2SArtem Belevich  %v = bitcast i32 %a to <4 x i8>
109cbafb6f2SArtem Belevich  %r0 = extractelement <4 x i8> %v, i64 0
110cbafb6f2SArtem Belevich  %r1 = extractelement <4 x i8> %v, i64 1
111cbafb6f2SArtem Belevich  %r2 = extractelement <4 x i8> %v, i64 2
112cbafb6f2SArtem Belevich  %r3 = extractelement <4 x i8> %v, i64 3
113cbafb6f2SArtem Belevich  %r0i = sext i8 %r0 to i32
114cbafb6f2SArtem Belevich  %r1i = sext i8 %r1 to i32
115cbafb6f2SArtem Belevich  %r2i = sext i8 %r2 to i32
116cbafb6f2SArtem Belevich  %r3i = sext i8 %r3 to i32
117cbafb6f2SArtem Belevich  %r01 = add i32 %r0i, %r1i
118cbafb6f2SArtem Belevich  %r23 = add i32 %r2i, %r3i
119cbafb6f2SArtem Belevich  %r = add i32 %r01, %r23
120cbafb6f2SArtem Belevich  ret i32 %r
121cbafb6f2SArtem Belevich}
122cbafb6f2SArtem Belevich
123cbafb6f2SArtem Belevichdefine i32  @test_v4i8_u32(i32 %a) {
124fe7d2841SArtem Belevich; CHECK-LABEL: test_v4i8_u32(
125fe7d2841SArtem Belevich; CHECK:       {
126fe7d2841SArtem Belevich; CHECK-NEXT:    .reg .b32 %r<9>;
127fe7d2841SArtem Belevich; CHECK-EMPTY:
128fe7d2841SArtem Belevich; CHECK-NEXT:  // %bb.0:
129fe7d2841SArtem Belevich; CHECK-NEXT:    ld.param.u32 %r1, [test_v4i8_u32_param_0];
130fe7d2841SArtem Belevich; CHECK-NEXT:    bfe.u32 %r2, %r1, 0, 8;
131fe7d2841SArtem Belevich; CHECK-NEXT:    bfe.u32 %r3, %r1, 8, 8;
132fe7d2841SArtem Belevich; CHECK-NEXT:    bfe.u32 %r4, %r1, 16, 8;
133fe7d2841SArtem Belevich; CHECK-NEXT:    bfe.u32 %r5, %r1, 24, 8;
134fe7d2841SArtem Belevich; CHECK-NEXT:    add.s32 %r6, %r2, %r3;
135fe7d2841SArtem Belevich; CHECK-NEXT:    add.s32 %r7, %r4, %r5;
136fe7d2841SArtem Belevich; CHECK-NEXT:    add.s32 %r8, %r6, %r7;
1370f0a96b8SYoungsuk Kim; CHECK-NEXT:    st.param.b32 [func_retval0], %r8;
138fe7d2841SArtem Belevich; CHECK-NEXT:    ret;
139cbafb6f2SArtem Belevich  %v = bitcast i32 %a to <4 x i8>
140cbafb6f2SArtem Belevich  %r0 = extractelement <4 x i8> %v, i64 0
141cbafb6f2SArtem Belevich  %r1 = extractelement <4 x i8> %v, i64 1
142cbafb6f2SArtem Belevich  %r2 = extractelement <4 x i8> %v, i64 2
143cbafb6f2SArtem Belevich  %r3 = extractelement <4 x i8> %v, i64 3
144cbafb6f2SArtem Belevich  %r0i = zext i8 %r0 to i32
145cbafb6f2SArtem Belevich  %r1i = zext i8 %r1 to i32
146cbafb6f2SArtem Belevich  %r2i = zext i8 %r2 to i32
147cbafb6f2SArtem Belevich  %r3i = zext i8 %r3 to i32
148cbafb6f2SArtem Belevich  %r01 = add i32 %r0i, %r1i
149cbafb6f2SArtem Belevich  %r23 = add i32 %r2i, %r3i
150cbafb6f2SArtem Belevich  %r = add i32 %r01, %r23
151cbafb6f2SArtem Belevich  ret i32 %r
152cbafb6f2SArtem Belevich}
153cbafb6f2SArtem Belevich
154cbafb6f2SArtem Belevich
155cbafb6f2SArtem Belevich
156d06b3e3bSArtem Belevichdefine i16  @test_v8i8(i64 %a) {
157fe7d2841SArtem Belevich; CHECK-LABEL: test_v8i8(
158fe7d2841SArtem Belevich; CHECK:       {
159fe7d2841SArtem Belevich; CHECK-NEXT:    .reg .b16 %rs<16>;
160*310e7987SAlex MacLean; CHECK-NEXT:    .reg .b32 %r<12>;
161fe7d2841SArtem Belevich; CHECK-NEXT:    .reg .b64 %rd<2>;
162fe7d2841SArtem Belevich; CHECK-EMPTY:
163fe7d2841SArtem Belevich; CHECK-NEXT:  // %bb.0:
164fe7d2841SArtem Belevich; CHECK-NEXT:    ld.param.u64 %rd1, [test_v8i8_param_0];
165*310e7987SAlex MacLean; CHECK-NEXT:    { .reg .b32 tmp; mov.b64 {tmp, %r1}, %rd1; }
166*310e7987SAlex MacLean; CHECK-NEXT:    cvt.u32.u64 %r2, %rd1;
167*310e7987SAlex MacLean; CHECK-NEXT:    bfe.s32 %r3, %r2, 0, 8;
168*310e7987SAlex MacLean; CHECK-NEXT:    cvt.s8.s32 %rs1, %r3;
169*310e7987SAlex MacLean; CHECK-NEXT:    bfe.s32 %r4, %r2, 8, 8;
170*310e7987SAlex MacLean; CHECK-NEXT:    cvt.s8.s32 %rs2, %r4;
171*310e7987SAlex MacLean; CHECK-NEXT:    bfe.s32 %r5, %r2, 16, 8;
172*310e7987SAlex MacLean; CHECK-NEXT:    cvt.s8.s32 %rs3, %r5;
173*310e7987SAlex MacLean; CHECK-NEXT:    bfe.s32 %r6, %r2, 24, 8;
174*310e7987SAlex MacLean; CHECK-NEXT:    cvt.s8.s32 %rs4, %r6;
175*310e7987SAlex MacLean; CHECK-NEXT:    bfe.s32 %r7, %r1, 0, 8;
176*310e7987SAlex MacLean; CHECK-NEXT:    cvt.s8.s32 %rs5, %r7;
177*310e7987SAlex MacLean; CHECK-NEXT:    bfe.s32 %r8, %r1, 8, 8;
178*310e7987SAlex MacLean; CHECK-NEXT:    cvt.s8.s32 %rs6, %r8;
179*310e7987SAlex MacLean; CHECK-NEXT:    bfe.s32 %r9, %r1, 16, 8;
180*310e7987SAlex MacLean; CHECK-NEXT:    cvt.s8.s32 %rs7, %r9;
181*310e7987SAlex MacLean; CHECK-NEXT:    bfe.s32 %r10, %r1, 24, 8;
182*310e7987SAlex MacLean; CHECK-NEXT:    cvt.s8.s32 %rs8, %r10;
183fe7d2841SArtem Belevich; CHECK-NEXT:    add.s16 %rs9, %rs1, %rs2;
184fe7d2841SArtem Belevich; CHECK-NEXT:    add.s16 %rs10, %rs3, %rs4;
185fe7d2841SArtem Belevich; CHECK-NEXT:    add.s16 %rs11, %rs5, %rs6;
186fe7d2841SArtem Belevich; CHECK-NEXT:    add.s16 %rs12, %rs7, %rs8;
187fe7d2841SArtem Belevich; CHECK-NEXT:    add.s16 %rs13, %rs9, %rs10;
188fe7d2841SArtem Belevich; CHECK-NEXT:    add.s16 %rs14, %rs11, %rs12;
189fe7d2841SArtem Belevich; CHECK-NEXT:    add.s16 %rs15, %rs13, %rs14;
190*310e7987SAlex MacLean; CHECK-NEXT:    cvt.u32.u16 %r11, %rs15;
191*310e7987SAlex MacLean; CHECK-NEXT:    st.param.b32 [func_retval0], %r11;
192fe7d2841SArtem Belevich; CHECK-NEXT:    ret;
193d06b3e3bSArtem Belevich  %v = bitcast i64 %a to <8 x i8>
194d06b3e3bSArtem Belevich  %r0 = extractelement <8 x i8> %v, i64 0
195d06b3e3bSArtem Belevich  %r1 = extractelement <8 x i8> %v, i64 1
196d06b3e3bSArtem Belevich  %r2 = extractelement <8 x i8> %v, i64 2
197d06b3e3bSArtem Belevich  %r3 = extractelement <8 x i8> %v, i64 3
198d06b3e3bSArtem Belevich  %r4 = extractelement <8 x i8> %v, i64 4
199d06b3e3bSArtem Belevich  %r5 = extractelement <8 x i8> %v, i64 5
200d06b3e3bSArtem Belevich  %r6 = extractelement <8 x i8> %v, i64 6
201d06b3e3bSArtem Belevich  %r7 = extractelement <8 x i8> %v, i64 7
202d06b3e3bSArtem Belevich  %r0i = sext i8 %r0 to i16
203d06b3e3bSArtem Belevich  %r1i = sext i8 %r1 to i16
204d06b3e3bSArtem Belevich  %r2i = sext i8 %r2 to i16
205d06b3e3bSArtem Belevich  %r3i = sext i8 %r3 to i16
206d06b3e3bSArtem Belevich  %r4i = sext i8 %r4 to i16
207d06b3e3bSArtem Belevich  %r5i = sext i8 %r5 to i16
208d06b3e3bSArtem Belevich  %r6i = sext i8 %r6 to i16
209d06b3e3bSArtem Belevich  %r7i = sext i8 %r7 to i16
210d06b3e3bSArtem Belevich  %r01 = add i16 %r0i, %r1i
211d06b3e3bSArtem Belevich  %r23 = add i16 %r2i, %r3i
212d06b3e3bSArtem Belevich  %r45 = add i16 %r4i, %r5i
213d06b3e3bSArtem Belevich  %r67 = add i16 %r6i, %r7i
214d06b3e3bSArtem Belevich  %r0123 = add i16 %r01, %r23
215d06b3e3bSArtem Belevich  %r4567 = add i16 %r45, %r67
216d06b3e3bSArtem Belevich  %r = add i16 %r0123, %r4567
217d06b3e3bSArtem Belevich  ret i16 %r
218d06b3e3bSArtem Belevich}
219