1fe7d2841SArtem Belevich; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2b279f6b0SFangrui Song; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s 3b279f6b0SFangrui Song; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %} 4fe7d2841SArtem Belevichtarget triple = "nvptx64-nvidia-cuda" 5d06b3e3bSArtem Belevich 6d06b3e3bSArtem Belevich 7d06b3e3bSArtem Belevichdefine i16 @test_v2i8(i16 %a) { 8fe7d2841SArtem Belevich; CHECK-LABEL: test_v2i8( 9fe7d2841SArtem Belevich; CHECK: { 10fe7d2841SArtem Belevich; CHECK-NEXT: .reg .b16 %rs<5>; 11fe7d2841SArtem Belevich; CHECK-NEXT: .reg .b32 %r<2>; 12fe7d2841SArtem Belevich; CHECK-EMPTY: 13fe7d2841SArtem Belevich; CHECK-NEXT: // %bb.0: 14fe7d2841SArtem Belevich; CHECK-NEXT: ld.param.u16 %rs1, [test_v2i8_param_0]; 15fe7d2841SArtem Belevich; CHECK-NEXT: cvt.s16.s8 %rs2, %rs1; 16fe7d2841SArtem Belevich; CHECK-NEXT: shr.s16 %rs3, %rs1, 8; 17fe7d2841SArtem Belevich; CHECK-NEXT: add.s16 %rs4, %rs2, %rs3; 18fe7d2841SArtem Belevich; CHECK-NEXT: cvt.u32.u16 %r1, %rs4; 190f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 20fe7d2841SArtem Belevich; CHECK-NEXT: ret; 21d06b3e3bSArtem Belevich %v = bitcast i16 %a to <2 x i8> 22d06b3e3bSArtem Belevich %r0 = extractelement <2 x i8> %v, i64 0 23d06b3e3bSArtem Belevich %r1 = extractelement <2 x i8> %v, i64 1 24d06b3e3bSArtem Belevich %r0i = sext i8 %r0 to i16 25d06b3e3bSArtem Belevich %r1i = sext i8 %r1 to i16 26d06b3e3bSArtem Belevich %r01 = add i16 %r0i, %r1i 27d06b3e3bSArtem Belevich ret i16 %r01 28d06b3e3bSArtem Belevich} 29d06b3e3bSArtem Belevich 30fe7d2841SArtem Belevichdefine i1 @test_v2i8_load(ptr %a) { 31fe7d2841SArtem Belevich; CHECK-LABEL: test_v2i8_load( 32fe7d2841SArtem Belevich; CHECK: { 33fe7d2841SArtem Belevich; CHECK-NEXT: .reg .pred %p<2>; 34fe7d2841SArtem Belevich; CHECK-NEXT: .reg .b16 %rs<7>; 35fe7d2841SArtem Belevich; CHECK-NEXT: .reg .b32 %r<2>; 36fe7d2841SArtem Belevich; CHECK-NEXT: .reg .b64 %rd<2>; 37fe7d2841SArtem Belevich; CHECK-EMPTY: 38fe7d2841SArtem Belevich; CHECK-NEXT: // %bb.0: 39fe7d2841SArtem Belevich; CHECK-NEXT: ld.param.u64 %rd1, [test_v2i8_load_param_0]; 40fe7d2841SArtem Belevich; CHECK-NEXT: ld.v2.u8 {%rs1, %rs2}, [%rd1]; 41fe7d2841SArtem Belevich; CHECK-NEXT: or.b16 %rs5, %rs1, %rs2; 42fe7d2841SArtem Belevich; CHECK-NEXT: and.b16 %rs6, %rs5, 255; 43fe7d2841SArtem Belevich; CHECK-NEXT: setp.eq.s16 %p1, %rs6, 0; 44fe7d2841SArtem Belevich; CHECK-NEXT: selp.u32 %r1, 1, 0, %p1; 450f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 46fe7d2841SArtem Belevich; CHECK-NEXT: ret; 47fe7d2841SArtem Belevich %v = load <2 x i8>, ptr %a, align 4 48fe7d2841SArtem Belevich %r0 = extractelement <2 x i8> %v, i64 0 49fe7d2841SArtem Belevich %r1 = extractelement <2 x i8> %v, i64 1 50fe7d2841SArtem Belevich %icmp = icmp eq i8 %r0, 0 51fe7d2841SArtem Belevich %icmp3 = icmp eq i8 %r1, 0 52fe7d2841SArtem Belevich %select = select i1 %icmp, i1 %icmp3, i1 false 53fe7d2841SArtem Belevich ret i1 %select 54fe7d2841SArtem Belevich} 55d06b3e3bSArtem Belevichdefine i16 @test_v4i8(i32 %a) { 56fe7d2841SArtem Belevich; CHECK-LABEL: test_v4i8( 57fe7d2841SArtem Belevich; CHECK: { 58fe7d2841SArtem Belevich; CHECK-NEXT: .reg .b16 %rs<8>; 59fe7d2841SArtem Belevich; CHECK-NEXT: .reg .b32 %r<7>; 60fe7d2841SArtem Belevich; CHECK-EMPTY: 61fe7d2841SArtem Belevich; CHECK-NEXT: // %bb.0: 62fe7d2841SArtem Belevich; CHECK-NEXT: ld.param.u32 %r1, [test_v4i8_param_0]; 63fe7d2841SArtem Belevich; CHECK-NEXT: bfe.s32 %r2, %r1, 0, 8; 64fe7d2841SArtem Belevich; CHECK-NEXT: cvt.s8.s32 %rs1, %r2; 65fe7d2841SArtem Belevich; CHECK-NEXT: bfe.s32 %r3, %r1, 8, 8; 66fe7d2841SArtem Belevich; CHECK-NEXT: cvt.s8.s32 %rs2, %r3; 67fe7d2841SArtem Belevich; CHECK-NEXT: bfe.s32 %r4, %r1, 16, 8; 68fe7d2841SArtem Belevich; CHECK-NEXT: cvt.s8.s32 %rs3, %r4; 69fe7d2841SArtem Belevich; CHECK-NEXT: bfe.s32 %r5, %r1, 24, 8; 70fe7d2841SArtem Belevich; CHECK-NEXT: cvt.s8.s32 %rs4, %r5; 71fe7d2841SArtem Belevich; CHECK-NEXT: add.s16 %rs5, %rs1, %rs2; 72fe7d2841SArtem Belevich; CHECK-NEXT: add.s16 %rs6, %rs3, %rs4; 73fe7d2841SArtem Belevich; CHECK-NEXT: add.s16 %rs7, %rs5, %rs6; 74fe7d2841SArtem Belevich; CHECK-NEXT: cvt.u32.u16 %r6, %rs7; 750f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r6; 76fe7d2841SArtem Belevich; CHECK-NEXT: ret; 77d06b3e3bSArtem Belevich %v = bitcast i32 %a to <4 x i8> 78d06b3e3bSArtem Belevich %r0 = extractelement <4 x i8> %v, i64 0 79d06b3e3bSArtem Belevich %r1 = extractelement <4 x i8> %v, i64 1 80d06b3e3bSArtem Belevich %r2 = extractelement <4 x i8> %v, i64 2 81d06b3e3bSArtem Belevich %r3 = extractelement <4 x i8> %v, i64 3 82d06b3e3bSArtem Belevich %r0i = sext i8 %r0 to i16 83d06b3e3bSArtem Belevich %r1i = sext i8 %r1 to i16 84d06b3e3bSArtem Belevich %r2i = sext i8 %r2 to i16 85d06b3e3bSArtem Belevich %r3i = sext i8 %r3 to i16 86d06b3e3bSArtem Belevich %r01 = add i16 %r0i, %r1i 87d06b3e3bSArtem Belevich %r23 = add i16 %r2i, %r3i 88d06b3e3bSArtem Belevich %r = add i16 %r01, %r23 89d06b3e3bSArtem Belevich ret i16 %r 90d06b3e3bSArtem Belevich} 91d06b3e3bSArtem Belevich 92cbafb6f2SArtem Belevichdefine i32 @test_v4i8_s32(i32 %a) { 93fe7d2841SArtem Belevich; CHECK-LABEL: test_v4i8_s32( 94fe7d2841SArtem Belevich; CHECK: { 95fe7d2841SArtem Belevich; CHECK-NEXT: .reg .b32 %r<9>; 96fe7d2841SArtem Belevich; CHECK-EMPTY: 97fe7d2841SArtem Belevich; CHECK-NEXT: // %bb.0: 98fe7d2841SArtem Belevich; CHECK-NEXT: ld.param.u32 %r1, [test_v4i8_s32_param_0]; 99fe7d2841SArtem Belevich; CHECK-NEXT: bfe.s32 %r2, %r1, 0, 8; 100fe7d2841SArtem Belevich; CHECK-NEXT: bfe.s32 %r3, %r1, 8, 8; 101fe7d2841SArtem Belevich; CHECK-NEXT: bfe.s32 %r4, %r1, 16, 8; 102fe7d2841SArtem Belevich; CHECK-NEXT: bfe.s32 %r5, %r1, 24, 8; 103fe7d2841SArtem Belevich; CHECK-NEXT: add.s32 %r6, %r2, %r3; 104fe7d2841SArtem Belevich; CHECK-NEXT: add.s32 %r7, %r4, %r5; 105fe7d2841SArtem Belevich; CHECK-NEXT: add.s32 %r8, %r6, %r7; 1060f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r8; 107fe7d2841SArtem Belevich; CHECK-NEXT: ret; 108cbafb6f2SArtem Belevich %v = bitcast i32 %a to <4 x i8> 109cbafb6f2SArtem Belevich %r0 = extractelement <4 x i8> %v, i64 0 110cbafb6f2SArtem Belevich %r1 = extractelement <4 x i8> %v, i64 1 111cbafb6f2SArtem Belevich %r2 = extractelement <4 x i8> %v, i64 2 112cbafb6f2SArtem Belevich %r3 = extractelement <4 x i8> %v, i64 3 113cbafb6f2SArtem Belevich %r0i = sext i8 %r0 to i32 114cbafb6f2SArtem Belevich %r1i = sext i8 %r1 to i32 115cbafb6f2SArtem Belevich %r2i = sext i8 %r2 to i32 116cbafb6f2SArtem Belevich %r3i = sext i8 %r3 to i32 117cbafb6f2SArtem Belevich %r01 = add i32 %r0i, %r1i 118cbafb6f2SArtem Belevich %r23 = add i32 %r2i, %r3i 119cbafb6f2SArtem Belevich %r = add i32 %r01, %r23 120cbafb6f2SArtem Belevich ret i32 %r 121cbafb6f2SArtem Belevich} 122cbafb6f2SArtem Belevich 123cbafb6f2SArtem Belevichdefine i32 @test_v4i8_u32(i32 %a) { 124fe7d2841SArtem Belevich; CHECK-LABEL: test_v4i8_u32( 125fe7d2841SArtem Belevich; CHECK: { 126fe7d2841SArtem Belevich; CHECK-NEXT: .reg .b32 %r<9>; 127fe7d2841SArtem Belevich; CHECK-EMPTY: 128fe7d2841SArtem Belevich; CHECK-NEXT: // %bb.0: 129fe7d2841SArtem Belevich; CHECK-NEXT: ld.param.u32 %r1, [test_v4i8_u32_param_0]; 130fe7d2841SArtem Belevich; CHECK-NEXT: bfe.u32 %r2, %r1, 0, 8; 131fe7d2841SArtem Belevich; CHECK-NEXT: bfe.u32 %r3, %r1, 8, 8; 132fe7d2841SArtem Belevich; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8; 133fe7d2841SArtem Belevich; CHECK-NEXT: bfe.u32 %r5, %r1, 24, 8; 134fe7d2841SArtem Belevich; CHECK-NEXT: add.s32 %r6, %r2, %r3; 135fe7d2841SArtem Belevich; CHECK-NEXT: add.s32 %r7, %r4, %r5; 136fe7d2841SArtem Belevich; CHECK-NEXT: add.s32 %r8, %r6, %r7; 1370f0a96b8SYoungsuk Kim; CHECK-NEXT: st.param.b32 [func_retval0], %r8; 138fe7d2841SArtem Belevich; CHECK-NEXT: ret; 139cbafb6f2SArtem Belevich %v = bitcast i32 %a to <4 x i8> 140cbafb6f2SArtem Belevich %r0 = extractelement <4 x i8> %v, i64 0 141cbafb6f2SArtem Belevich %r1 = extractelement <4 x i8> %v, i64 1 142cbafb6f2SArtem Belevich %r2 = extractelement <4 x i8> %v, i64 2 143cbafb6f2SArtem Belevich %r3 = extractelement <4 x i8> %v, i64 3 144cbafb6f2SArtem Belevich %r0i = zext i8 %r0 to i32 145cbafb6f2SArtem Belevich %r1i = zext i8 %r1 to i32 146cbafb6f2SArtem Belevich %r2i = zext i8 %r2 to i32 147cbafb6f2SArtem Belevich %r3i = zext i8 %r3 to i32 148cbafb6f2SArtem Belevich %r01 = add i32 %r0i, %r1i 149cbafb6f2SArtem Belevich %r23 = add i32 %r2i, %r3i 150cbafb6f2SArtem Belevich %r = add i32 %r01, %r23 151cbafb6f2SArtem Belevich ret i32 %r 152cbafb6f2SArtem Belevich} 153cbafb6f2SArtem Belevich 154cbafb6f2SArtem Belevich 155cbafb6f2SArtem Belevich 156d06b3e3bSArtem Belevichdefine i16 @test_v8i8(i64 %a) { 157fe7d2841SArtem Belevich; CHECK-LABEL: test_v8i8( 158fe7d2841SArtem Belevich; CHECK: { 159fe7d2841SArtem Belevich; CHECK-NEXT: .reg .b16 %rs<16>; 160*310e7987SAlex MacLean; CHECK-NEXT: .reg .b32 %r<12>; 161fe7d2841SArtem Belevich; CHECK-NEXT: .reg .b64 %rd<2>; 162fe7d2841SArtem Belevich; CHECK-EMPTY: 163fe7d2841SArtem Belevich; CHECK-NEXT: // %bb.0: 164fe7d2841SArtem Belevich; CHECK-NEXT: ld.param.u64 %rd1, [test_v8i8_param_0]; 165*310e7987SAlex MacLean; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {tmp, %r1}, %rd1; } 166*310e7987SAlex MacLean; CHECK-NEXT: cvt.u32.u64 %r2, %rd1; 167*310e7987SAlex MacLean; CHECK-NEXT: bfe.s32 %r3, %r2, 0, 8; 168*310e7987SAlex MacLean; CHECK-NEXT: cvt.s8.s32 %rs1, %r3; 169*310e7987SAlex MacLean; CHECK-NEXT: bfe.s32 %r4, %r2, 8, 8; 170*310e7987SAlex MacLean; CHECK-NEXT: cvt.s8.s32 %rs2, %r4; 171*310e7987SAlex MacLean; CHECK-NEXT: bfe.s32 %r5, %r2, 16, 8; 172*310e7987SAlex MacLean; CHECK-NEXT: cvt.s8.s32 %rs3, %r5; 173*310e7987SAlex MacLean; CHECK-NEXT: bfe.s32 %r6, %r2, 24, 8; 174*310e7987SAlex MacLean; CHECK-NEXT: cvt.s8.s32 %rs4, %r6; 175*310e7987SAlex MacLean; CHECK-NEXT: bfe.s32 %r7, %r1, 0, 8; 176*310e7987SAlex MacLean; CHECK-NEXT: cvt.s8.s32 %rs5, %r7; 177*310e7987SAlex MacLean; CHECK-NEXT: bfe.s32 %r8, %r1, 8, 8; 178*310e7987SAlex MacLean; CHECK-NEXT: cvt.s8.s32 %rs6, %r8; 179*310e7987SAlex MacLean; CHECK-NEXT: bfe.s32 %r9, %r1, 16, 8; 180*310e7987SAlex MacLean; CHECK-NEXT: cvt.s8.s32 %rs7, %r9; 181*310e7987SAlex MacLean; CHECK-NEXT: bfe.s32 %r10, %r1, 24, 8; 182*310e7987SAlex MacLean; CHECK-NEXT: cvt.s8.s32 %rs8, %r10; 183fe7d2841SArtem Belevich; CHECK-NEXT: add.s16 %rs9, %rs1, %rs2; 184fe7d2841SArtem Belevich; CHECK-NEXT: add.s16 %rs10, %rs3, %rs4; 185fe7d2841SArtem Belevich; CHECK-NEXT: add.s16 %rs11, %rs5, %rs6; 186fe7d2841SArtem Belevich; CHECK-NEXT: add.s16 %rs12, %rs7, %rs8; 187fe7d2841SArtem Belevich; CHECK-NEXT: add.s16 %rs13, %rs9, %rs10; 188fe7d2841SArtem Belevich; CHECK-NEXT: add.s16 %rs14, %rs11, %rs12; 189fe7d2841SArtem Belevich; CHECK-NEXT: add.s16 %rs15, %rs13, %rs14; 190*310e7987SAlex MacLean; CHECK-NEXT: cvt.u32.u16 %r11, %rs15; 191*310e7987SAlex MacLean; CHECK-NEXT: st.param.b32 [func_retval0], %r11; 192fe7d2841SArtem Belevich; CHECK-NEXT: ret; 193d06b3e3bSArtem Belevich %v = bitcast i64 %a to <8 x i8> 194d06b3e3bSArtem Belevich %r0 = extractelement <8 x i8> %v, i64 0 195d06b3e3bSArtem Belevich %r1 = extractelement <8 x i8> %v, i64 1 196d06b3e3bSArtem Belevich %r2 = extractelement <8 x i8> %v, i64 2 197d06b3e3bSArtem Belevich %r3 = extractelement <8 x i8> %v, i64 3 198d06b3e3bSArtem Belevich %r4 = extractelement <8 x i8> %v, i64 4 199d06b3e3bSArtem Belevich %r5 = extractelement <8 x i8> %v, i64 5 200d06b3e3bSArtem Belevich %r6 = extractelement <8 x i8> %v, i64 6 201d06b3e3bSArtem Belevich %r7 = extractelement <8 x i8> %v, i64 7 202d06b3e3bSArtem Belevich %r0i = sext i8 %r0 to i16 203d06b3e3bSArtem Belevich %r1i = sext i8 %r1 to i16 204d06b3e3bSArtem Belevich %r2i = sext i8 %r2 to i16 205d06b3e3bSArtem Belevich %r3i = sext i8 %r3 to i16 206d06b3e3bSArtem Belevich %r4i = sext i8 %r4 to i16 207d06b3e3bSArtem Belevich %r5i = sext i8 %r5 to i16 208d06b3e3bSArtem Belevich %r6i = sext i8 %r6 to i16 209d06b3e3bSArtem Belevich %r7i = sext i8 %r7 to i16 210d06b3e3bSArtem Belevich %r01 = add i16 %r0i, %r1i 211d06b3e3bSArtem Belevich %r23 = add i16 %r2i, %r3i 212d06b3e3bSArtem Belevich %r45 = add i16 %r4i, %r5i 213d06b3e3bSArtem Belevich %r67 = add i16 %r6i, %r7i 214d06b3e3bSArtem Belevich %r0123 = add i16 %r01, %r23 215d06b3e3bSArtem Belevich %r4567 = add i16 %r45, %r67 216d06b3e3bSArtem Belevich %r = add i16 %r0123, %r4567 217d06b3e3bSArtem Belevich ret i16 %r 218d06b3e3bSArtem Belevich} 219