xref: /llvm-project/llvm/test/CodeGen/NVPTX/extractelement.ll (revision 310e79875752886a7713911e2a1ec14bc75bd4b3)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s
3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %}
4target triple = "nvptx64-nvidia-cuda"
5
6
7define i16  @test_v2i8(i16 %a) {
8; CHECK-LABEL: test_v2i8(
9; CHECK:       {
10; CHECK-NEXT:    .reg .b16 %rs<5>;
11; CHECK-NEXT:    .reg .b32 %r<2>;
12; CHECK-EMPTY:
13; CHECK-NEXT:  // %bb.0:
14; CHECK-NEXT:    ld.param.u16 %rs1, [test_v2i8_param_0];
15; CHECK-NEXT:    cvt.s16.s8 %rs2, %rs1;
16; CHECK-NEXT:    shr.s16 %rs3, %rs1, 8;
17; CHECK-NEXT:    add.s16 %rs4, %rs2, %rs3;
18; CHECK-NEXT:    cvt.u32.u16 %r1, %rs4;
19; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
20; CHECK-NEXT:    ret;
21  %v = bitcast i16 %a to <2 x i8>
22  %r0 = extractelement <2 x i8> %v, i64 0
23  %r1 = extractelement <2 x i8> %v, i64 1
24  %r0i = sext i8 %r0 to i16
25  %r1i = sext i8 %r1 to i16
26  %r01 = add i16 %r0i, %r1i
27  ret i16 %r01
28}
29
30define i1  @test_v2i8_load(ptr %a) {
31; CHECK-LABEL: test_v2i8_load(
32; CHECK:       {
33; CHECK-NEXT:    .reg .pred %p<2>;
34; CHECK-NEXT:    .reg .b16 %rs<7>;
35; CHECK-NEXT:    .reg .b32 %r<2>;
36; CHECK-NEXT:    .reg .b64 %rd<2>;
37; CHECK-EMPTY:
38; CHECK-NEXT:  // %bb.0:
39; CHECK-NEXT:    ld.param.u64 %rd1, [test_v2i8_load_param_0];
40; CHECK-NEXT:    ld.v2.u8 {%rs1, %rs2}, [%rd1];
41; CHECK-NEXT:    or.b16 %rs5, %rs1, %rs2;
42; CHECK-NEXT:    and.b16 %rs6, %rs5, 255;
43; CHECK-NEXT:    setp.eq.s16 %p1, %rs6, 0;
44; CHECK-NEXT:    selp.u32 %r1, 1, 0, %p1;
45; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
46; CHECK-NEXT:    ret;
47  %v = load <2 x i8>, ptr %a, align 4
48  %r0 = extractelement <2 x i8> %v, i64 0
49  %r1 = extractelement <2 x i8> %v, i64 1
50  %icmp = icmp eq i8 %r0, 0
51  %icmp3 = icmp eq i8 %r1, 0
52  %select = select i1 %icmp, i1 %icmp3, i1 false
53  ret i1 %select
54}
55define i16  @test_v4i8(i32 %a) {
56; CHECK-LABEL: test_v4i8(
57; CHECK:       {
58; CHECK-NEXT:    .reg .b16 %rs<8>;
59; CHECK-NEXT:    .reg .b32 %r<7>;
60; CHECK-EMPTY:
61; CHECK-NEXT:  // %bb.0:
62; CHECK-NEXT:    ld.param.u32 %r1, [test_v4i8_param_0];
63; CHECK-NEXT:    bfe.s32 %r2, %r1, 0, 8;
64; CHECK-NEXT:    cvt.s8.s32 %rs1, %r2;
65; CHECK-NEXT:    bfe.s32 %r3, %r1, 8, 8;
66; CHECK-NEXT:    cvt.s8.s32 %rs2, %r3;
67; CHECK-NEXT:    bfe.s32 %r4, %r1, 16, 8;
68; CHECK-NEXT:    cvt.s8.s32 %rs3, %r4;
69; CHECK-NEXT:    bfe.s32 %r5, %r1, 24, 8;
70; CHECK-NEXT:    cvt.s8.s32 %rs4, %r5;
71; CHECK-NEXT:    add.s16 %rs5, %rs1, %rs2;
72; CHECK-NEXT:    add.s16 %rs6, %rs3, %rs4;
73; CHECK-NEXT:    add.s16 %rs7, %rs5, %rs6;
74; CHECK-NEXT:    cvt.u32.u16 %r6, %rs7;
75; CHECK-NEXT:    st.param.b32 [func_retval0], %r6;
76; CHECK-NEXT:    ret;
77  %v = bitcast i32 %a to <4 x i8>
78  %r0 = extractelement <4 x i8> %v, i64 0
79  %r1 = extractelement <4 x i8> %v, i64 1
80  %r2 = extractelement <4 x i8> %v, i64 2
81  %r3 = extractelement <4 x i8> %v, i64 3
82  %r0i = sext i8 %r0 to i16
83  %r1i = sext i8 %r1 to i16
84  %r2i = sext i8 %r2 to i16
85  %r3i = sext i8 %r3 to i16
86  %r01 = add i16 %r0i, %r1i
87  %r23 = add i16 %r2i, %r3i
88  %r = add i16 %r01, %r23
89  ret i16 %r
90}
91
92define i32  @test_v4i8_s32(i32 %a) {
93; CHECK-LABEL: test_v4i8_s32(
94; CHECK:       {
95; CHECK-NEXT:    .reg .b32 %r<9>;
96; CHECK-EMPTY:
97; CHECK-NEXT:  // %bb.0:
98; CHECK-NEXT:    ld.param.u32 %r1, [test_v4i8_s32_param_0];
99; CHECK-NEXT:    bfe.s32 %r2, %r1, 0, 8;
100; CHECK-NEXT:    bfe.s32 %r3, %r1, 8, 8;
101; CHECK-NEXT:    bfe.s32 %r4, %r1, 16, 8;
102; CHECK-NEXT:    bfe.s32 %r5, %r1, 24, 8;
103; CHECK-NEXT:    add.s32 %r6, %r2, %r3;
104; CHECK-NEXT:    add.s32 %r7, %r4, %r5;
105; CHECK-NEXT:    add.s32 %r8, %r6, %r7;
106; CHECK-NEXT:    st.param.b32 [func_retval0], %r8;
107; CHECK-NEXT:    ret;
108  %v = bitcast i32 %a to <4 x i8>
109  %r0 = extractelement <4 x i8> %v, i64 0
110  %r1 = extractelement <4 x i8> %v, i64 1
111  %r2 = extractelement <4 x i8> %v, i64 2
112  %r3 = extractelement <4 x i8> %v, i64 3
113  %r0i = sext i8 %r0 to i32
114  %r1i = sext i8 %r1 to i32
115  %r2i = sext i8 %r2 to i32
116  %r3i = sext i8 %r3 to i32
117  %r01 = add i32 %r0i, %r1i
118  %r23 = add i32 %r2i, %r3i
119  %r = add i32 %r01, %r23
120  ret i32 %r
121}
122
123define i32  @test_v4i8_u32(i32 %a) {
124; CHECK-LABEL: test_v4i8_u32(
125; CHECK:       {
126; CHECK-NEXT:    .reg .b32 %r<9>;
127; CHECK-EMPTY:
128; CHECK-NEXT:  // %bb.0:
129; CHECK-NEXT:    ld.param.u32 %r1, [test_v4i8_u32_param_0];
130; CHECK-NEXT:    bfe.u32 %r2, %r1, 0, 8;
131; CHECK-NEXT:    bfe.u32 %r3, %r1, 8, 8;
132; CHECK-NEXT:    bfe.u32 %r4, %r1, 16, 8;
133; CHECK-NEXT:    bfe.u32 %r5, %r1, 24, 8;
134; CHECK-NEXT:    add.s32 %r6, %r2, %r3;
135; CHECK-NEXT:    add.s32 %r7, %r4, %r5;
136; CHECK-NEXT:    add.s32 %r8, %r6, %r7;
137; CHECK-NEXT:    st.param.b32 [func_retval0], %r8;
138; CHECK-NEXT:    ret;
139  %v = bitcast i32 %a to <4 x i8>
140  %r0 = extractelement <4 x i8> %v, i64 0
141  %r1 = extractelement <4 x i8> %v, i64 1
142  %r2 = extractelement <4 x i8> %v, i64 2
143  %r3 = extractelement <4 x i8> %v, i64 3
144  %r0i = zext i8 %r0 to i32
145  %r1i = zext i8 %r1 to i32
146  %r2i = zext i8 %r2 to i32
147  %r3i = zext i8 %r3 to i32
148  %r01 = add i32 %r0i, %r1i
149  %r23 = add i32 %r2i, %r3i
150  %r = add i32 %r01, %r23
151  ret i32 %r
152}
153
154
155
156define i16  @test_v8i8(i64 %a) {
157; CHECK-LABEL: test_v8i8(
158; CHECK:       {
159; CHECK-NEXT:    .reg .b16 %rs<16>;
160; CHECK-NEXT:    .reg .b32 %r<12>;
161; CHECK-NEXT:    .reg .b64 %rd<2>;
162; CHECK-EMPTY:
163; CHECK-NEXT:  // %bb.0:
164; CHECK-NEXT:    ld.param.u64 %rd1, [test_v8i8_param_0];
165; CHECK-NEXT:    { .reg .b32 tmp; mov.b64 {tmp, %r1}, %rd1; }
166; CHECK-NEXT:    cvt.u32.u64 %r2, %rd1;
167; CHECK-NEXT:    bfe.s32 %r3, %r2, 0, 8;
168; CHECK-NEXT:    cvt.s8.s32 %rs1, %r3;
169; CHECK-NEXT:    bfe.s32 %r4, %r2, 8, 8;
170; CHECK-NEXT:    cvt.s8.s32 %rs2, %r4;
171; CHECK-NEXT:    bfe.s32 %r5, %r2, 16, 8;
172; CHECK-NEXT:    cvt.s8.s32 %rs3, %r5;
173; CHECK-NEXT:    bfe.s32 %r6, %r2, 24, 8;
174; CHECK-NEXT:    cvt.s8.s32 %rs4, %r6;
175; CHECK-NEXT:    bfe.s32 %r7, %r1, 0, 8;
176; CHECK-NEXT:    cvt.s8.s32 %rs5, %r7;
177; CHECK-NEXT:    bfe.s32 %r8, %r1, 8, 8;
178; CHECK-NEXT:    cvt.s8.s32 %rs6, %r8;
179; CHECK-NEXT:    bfe.s32 %r9, %r1, 16, 8;
180; CHECK-NEXT:    cvt.s8.s32 %rs7, %r9;
181; CHECK-NEXT:    bfe.s32 %r10, %r1, 24, 8;
182; CHECK-NEXT:    cvt.s8.s32 %rs8, %r10;
183; CHECK-NEXT:    add.s16 %rs9, %rs1, %rs2;
184; CHECK-NEXT:    add.s16 %rs10, %rs3, %rs4;
185; CHECK-NEXT:    add.s16 %rs11, %rs5, %rs6;
186; CHECK-NEXT:    add.s16 %rs12, %rs7, %rs8;
187; CHECK-NEXT:    add.s16 %rs13, %rs9, %rs10;
188; CHECK-NEXT:    add.s16 %rs14, %rs11, %rs12;
189; CHECK-NEXT:    add.s16 %rs15, %rs13, %rs14;
190; CHECK-NEXT:    cvt.u32.u16 %r11, %rs15;
191; CHECK-NEXT:    st.param.b32 [func_retval0], %r11;
192; CHECK-NEXT:    ret;
193  %v = bitcast i64 %a to <8 x i8>
194  %r0 = extractelement <8 x i8> %v, i64 0
195  %r1 = extractelement <8 x i8> %v, i64 1
196  %r2 = extractelement <8 x i8> %v, i64 2
197  %r3 = extractelement <8 x i8> %v, i64 3
198  %r4 = extractelement <8 x i8> %v, i64 4
199  %r5 = extractelement <8 x i8> %v, i64 5
200  %r6 = extractelement <8 x i8> %v, i64 6
201  %r7 = extractelement <8 x i8> %v, i64 7
202  %r0i = sext i8 %r0 to i16
203  %r1i = sext i8 %r1 to i16
204  %r2i = sext i8 %r2 to i16
205  %r3i = sext i8 %r3 to i16
206  %r4i = sext i8 %r4 to i16
207  %r5i = sext i8 %r5 to i16
208  %r6i = sext i8 %r6 to i16
209  %r7i = sext i8 %r7 to i16
210  %r01 = add i16 %r0i, %r1i
211  %r23 = add i16 %r2i, %r3i
212  %r45 = add i16 %r4i, %r5i
213  %r67 = add i16 %r6i, %r7i
214  %r0123 = add i16 %r01, %r23
215  %r4567 = add i16 %r45, %r67
216  %r = add i16 %r0123, %r4567
217  ret i16 %r
218}
219