1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s 3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} 4 5target triple = "nvptx64-nvidia-cuda" 6 7define i16 @bswap16(i16 %a) { 8; CHECK-LABEL: bswap16( 9; CHECK: { 10; CHECK-NEXT: .reg .b16 %rs<5>; 11; CHECK-NEXT: .reg .b32 %r<2>; 12; CHECK-EMPTY: 13; CHECK-NEXT: // %bb.0: 14; CHECK-NEXT: ld.param.u16 %rs1, [bswap16_param_0]; 15; CHECK-NEXT: shr.u16 %rs2, %rs1, 8; 16; CHECK-NEXT: shl.b16 %rs3, %rs1, 8; 17; CHECK-NEXT: or.b16 %rs4, %rs3, %rs2; 18; CHECK-NEXT: cvt.u32.u16 %r1, %rs4; 19; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 20; CHECK-NEXT: ret; 21 %b = tail call i16 @llvm.bswap.i16(i16 %a) 22 ret i16 %b 23} 24 25 26define i32 @bswap32(i32 %a) { 27; CHECK-LABEL: bswap32( 28; CHECK: { 29; CHECK-NEXT: .reg .b32 %r<3>; 30; CHECK-EMPTY: 31; CHECK-NEXT: // %bb.0: 32; CHECK-NEXT: ld.param.u32 %r1, [bswap32_param_0]; 33; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 291; 34; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 35; CHECK-NEXT: ret; 36 %b = tail call i32 @llvm.bswap.i32(i32 %a) 37 ret i32 %b 38} 39 40 41define <2 x i16> @bswapv2i16(<2 x i16> %a) #0 { 42; CHECK-LABEL: bswapv2i16( 43; CHECK: { 44; CHECK-NEXT: .reg .b32 %r<3>; 45; CHECK-EMPTY: 46; CHECK-NEXT: // %bb.0: 47; CHECK-NEXT: ld.param.u32 %r1, [bswapv2i16_param_0]; 48; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 8961; 49; CHECK-NEXT: st.param.b32 [func_retval0], %r2; 50; CHECK-NEXT: ret; 51 %b = tail call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a) 52 ret <2 x i16> %b 53} 54 55define i64 @bswap64(i64 %a) { 56; CHECK-LABEL: bswap64( 57; CHECK: { 58; CHECK-NEXT: .reg .b32 %r<5>; 59; CHECK-NEXT: .reg .b64 %rd<3>; 60; CHECK-EMPTY: 61; CHECK-NEXT: // %bb.0: 62; CHECK-NEXT: ld.param.u64 %rd1, [bswap64_param_0]; 63; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {%r1, tmp}, %rd1; } 64; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 291; 65; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {tmp, %r3}, %rd1; } 66; CHECK-NEXT: prmt.b32 %r4, %r3, 0, 291; 67; CHECK-NEXT: mov.b64 %rd2, {%r4, %r2}; 68; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; 69; CHECK-NEXT: ret; 70 %b = tail call i64 @llvm.bswap.i64(i64 %a) 71 ret i64 %b 72} 73 74declare i16 @llvm.bswap.i16(i16) 75declare i32 @llvm.bswap.i32(i32) 76declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>) 77declare i64 @llvm.bswap.i64(i64) 78