xref: /llvm-project/llvm/test/CodeGen/NVPTX/bswap.ll (revision b279f6b098d3849f7f1c1f539b108307d5f8ae2d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %}
4
5target triple = "nvptx64-nvidia-cuda"
6
7define i16 @bswap16(i16 %a) {
8; CHECK-LABEL: bswap16(
9; CHECK:       {
10; CHECK-NEXT:    .reg .b16 %rs<5>;
11; CHECK-NEXT:    .reg .b32 %r<2>;
12; CHECK-EMPTY:
13; CHECK-NEXT:  // %bb.0:
14; CHECK-NEXT:    ld.param.u16 %rs1, [bswap16_param_0];
15; CHECK-NEXT:    shr.u16 %rs2, %rs1, 8;
16; CHECK-NEXT:    shl.b16 %rs3, %rs1, 8;
17; CHECK-NEXT:    or.b16 %rs4, %rs3, %rs2;
18; CHECK-NEXT:    cvt.u32.u16 %r1, %rs4;
19; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
20; CHECK-NEXT:    ret;
21  %b = tail call i16 @llvm.bswap.i16(i16 %a)
22  ret i16 %b
23}
24
25
26define i32 @bswap32(i32 %a) {
27; CHECK-LABEL: bswap32(
28; CHECK:       {
29; CHECK-NEXT:    .reg .b32 %r<3>;
30; CHECK-EMPTY:
31; CHECK-NEXT:  // %bb.0:
32; CHECK-NEXT:    ld.param.u32 %r1, [bswap32_param_0];
33; CHECK-NEXT:    prmt.b32 %r2, %r1, 0, 291;
34; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
35; CHECK-NEXT:    ret;
36  %b = tail call i32 @llvm.bswap.i32(i32 %a)
37  ret i32 %b
38}
39
40
41define <2 x i16> @bswapv2i16(<2 x i16> %a) #0 {
42; CHECK-LABEL: bswapv2i16(
43; CHECK:       {
44; CHECK-NEXT:    .reg .b32 %r<3>;
45; CHECK-EMPTY:
46; CHECK-NEXT:  // %bb.0:
47; CHECK-NEXT:    ld.param.u32 %r1, [bswapv2i16_param_0];
48; CHECK-NEXT:    prmt.b32 %r2, %r1, 0, 8961;
49; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
50; CHECK-NEXT:    ret;
51  %b = tail call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a)
52  ret <2 x i16> %b
53}
54
55define i64 @bswap64(i64 %a) {
56; CHECK-LABEL: bswap64(
57; CHECK:       {
58; CHECK-NEXT:    .reg .b32 %r<5>;
59; CHECK-NEXT:    .reg .b64 %rd<3>;
60; CHECK-EMPTY:
61; CHECK-NEXT:  // %bb.0:
62; CHECK-NEXT:    ld.param.u64 %rd1, [bswap64_param_0];
63; CHECK-NEXT:    { .reg .b32 tmp; mov.b64 {%r1, tmp}, %rd1; }
64; CHECK-NEXT:    prmt.b32 %r2, %r1, 0, 291;
65; CHECK-NEXT:    { .reg .b32 tmp; mov.b64 {tmp, %r3}, %rd1; }
66; CHECK-NEXT:    prmt.b32 %r4, %r3, 0, 291;
67; CHECK-NEXT:    mov.b64 %rd2, {%r4, %r2};
68; CHECK-NEXT:    st.param.b64 [func_retval0], %rd2;
69; CHECK-NEXT:    ret;
70  %b = tail call i64 @llvm.bswap.i64(i64 %a)
71  ret i64 %b
72}
73
74declare i16 @llvm.bswap.i16(i16)
75declare i32 @llvm.bswap.i32(i32)
76declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>)
77declare i64 @llvm.bswap.i64(i64)
78