1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck %s 3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %} 4target triple = "nvptx64-nvidia-cuda" 5 6 7define i16 @test_v2i8(i16 %a) { 8; CHECK-LABEL: test_v2i8( 9; CHECK: { 10; CHECK-NEXT: .reg .b16 %rs<5>; 11; CHECK-NEXT: .reg .b32 %r<2>; 12; CHECK-EMPTY: 13; CHECK-NEXT: // %bb.0: 14; CHECK-NEXT: ld.param.u16 %rs1, [test_v2i8_param_0]; 15; CHECK-NEXT: cvt.s16.s8 %rs2, %rs1; 16; CHECK-NEXT: shr.s16 %rs3, %rs1, 8; 17; CHECK-NEXT: add.s16 %rs4, %rs2, %rs3; 18; CHECK-NEXT: cvt.u32.u16 %r1, %rs4; 19; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 20; CHECK-NEXT: ret; 21 %v = bitcast i16 %a to <2 x i8> 22 %r0 = extractelement <2 x i8> %v, i64 0 23 %r1 = extractelement <2 x i8> %v, i64 1 24 %r0i = sext i8 %r0 to i16 25 %r1i = sext i8 %r1 to i16 26 %r01 = add i16 %r0i, %r1i 27 ret i16 %r01 28} 29 30define i1 @test_v2i8_load(ptr %a) { 31; CHECK-LABEL: test_v2i8_load( 32; CHECK: { 33; CHECK-NEXT: .reg .pred %p<2>; 34; CHECK-NEXT: .reg .b16 %rs<7>; 35; CHECK-NEXT: .reg .b32 %r<2>; 36; CHECK-NEXT: .reg .b64 %rd<2>; 37; CHECK-EMPTY: 38; CHECK-NEXT: // %bb.0: 39; CHECK-NEXT: ld.param.u64 %rd1, [test_v2i8_load_param_0]; 40; CHECK-NEXT: ld.v2.u8 {%rs1, %rs2}, [%rd1]; 41; CHECK-NEXT: or.b16 %rs5, %rs1, %rs2; 42; CHECK-NEXT: and.b16 %rs6, %rs5, 255; 43; CHECK-NEXT: setp.eq.s16 %p1, %rs6, 0; 44; CHECK-NEXT: selp.u32 %r1, 1, 0, %p1; 45; CHECK-NEXT: st.param.b32 [func_retval0], %r1; 46; CHECK-NEXT: ret; 47 %v = load <2 x i8>, ptr %a, align 4 48 %r0 = extractelement <2 x i8> %v, i64 0 49 %r1 = extractelement <2 x i8> %v, i64 1 50 %icmp = icmp eq i8 %r0, 0 51 %icmp3 = icmp eq i8 %r1, 0 52 %select = select i1 %icmp, i1 %icmp3, i1 false 53 ret i1 %select 54} 55define i16 @test_v4i8(i32 %a) { 56; CHECK-LABEL: test_v4i8( 57; CHECK: { 58; CHECK-NEXT: .reg .b16 %rs<8>; 59; CHECK-NEXT: .reg .b32 %r<7>; 60; CHECK-EMPTY: 61; CHECK-NEXT: // %bb.0: 62; CHECK-NEXT: ld.param.u32 %r1, [test_v4i8_param_0]; 63; CHECK-NEXT: bfe.s32 %r2, %r1, 0, 8; 64; CHECK-NEXT: cvt.s8.s32 %rs1, %r2; 65; CHECK-NEXT: bfe.s32 %r3, %r1, 8, 8; 66; CHECK-NEXT: cvt.s8.s32 %rs2, %r3; 67; CHECK-NEXT: bfe.s32 %r4, %r1, 16, 8; 68; CHECK-NEXT: cvt.s8.s32 %rs3, %r4; 69; CHECK-NEXT: bfe.s32 %r5, %r1, 24, 8; 70; CHECK-NEXT: cvt.s8.s32 %rs4, %r5; 71; CHECK-NEXT: add.s16 %rs5, %rs1, %rs2; 72; CHECK-NEXT: add.s16 %rs6, %rs3, %rs4; 73; CHECK-NEXT: add.s16 %rs7, %rs5, %rs6; 74; CHECK-NEXT: cvt.u32.u16 %r6, %rs7; 75; CHECK-NEXT: st.param.b32 [func_retval0], %r6; 76; CHECK-NEXT: ret; 77 %v = bitcast i32 %a to <4 x i8> 78 %r0 = extractelement <4 x i8> %v, i64 0 79 %r1 = extractelement <4 x i8> %v, i64 1 80 %r2 = extractelement <4 x i8> %v, i64 2 81 %r3 = extractelement <4 x i8> %v, i64 3 82 %r0i = sext i8 %r0 to i16 83 %r1i = sext i8 %r1 to i16 84 %r2i = sext i8 %r2 to i16 85 %r3i = sext i8 %r3 to i16 86 %r01 = add i16 %r0i, %r1i 87 %r23 = add i16 %r2i, %r3i 88 %r = add i16 %r01, %r23 89 ret i16 %r 90} 91 92define i32 @test_v4i8_s32(i32 %a) { 93; CHECK-LABEL: test_v4i8_s32( 94; CHECK: { 95; CHECK-NEXT: .reg .b32 %r<9>; 96; CHECK-EMPTY: 97; CHECK-NEXT: // %bb.0: 98; CHECK-NEXT: ld.param.u32 %r1, [test_v4i8_s32_param_0]; 99; CHECK-NEXT: bfe.s32 %r2, %r1, 0, 8; 100; CHECK-NEXT: bfe.s32 %r3, %r1, 8, 8; 101; CHECK-NEXT: bfe.s32 %r4, %r1, 16, 8; 102; CHECK-NEXT: bfe.s32 %r5, %r1, 24, 8; 103; CHECK-NEXT: add.s32 %r6, %r2, %r3; 104; CHECK-NEXT: add.s32 %r7, %r4, %r5; 105; CHECK-NEXT: add.s32 %r8, %r6, %r7; 106; CHECK-NEXT: st.param.b32 [func_retval0], %r8; 107; CHECK-NEXT: ret; 108 %v = bitcast i32 %a to <4 x i8> 109 %r0 = extractelement <4 x i8> %v, i64 0 110 %r1 = extractelement <4 x i8> %v, i64 1 111 %r2 = extractelement <4 x i8> %v, i64 2 112 %r3 = extractelement <4 x i8> %v, i64 3 113 %r0i = sext i8 %r0 to i32 114 %r1i = sext i8 %r1 to i32 115 %r2i = sext i8 %r2 to i32 116 %r3i = sext i8 %r3 to i32 117 %r01 = add i32 %r0i, %r1i 118 %r23 = add i32 %r2i, %r3i 119 %r = add i32 %r01, %r23 120 ret i32 %r 121} 122 123define i32 @test_v4i8_u32(i32 %a) { 124; CHECK-LABEL: test_v4i8_u32( 125; CHECK: { 126; CHECK-NEXT: .reg .b32 %r<9>; 127; CHECK-EMPTY: 128; CHECK-NEXT: // %bb.0: 129; CHECK-NEXT: ld.param.u32 %r1, [test_v4i8_u32_param_0]; 130; CHECK-NEXT: bfe.u32 %r2, %r1, 0, 8; 131; CHECK-NEXT: bfe.u32 %r3, %r1, 8, 8; 132; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8; 133; CHECK-NEXT: bfe.u32 %r5, %r1, 24, 8; 134; CHECK-NEXT: add.s32 %r6, %r2, %r3; 135; CHECK-NEXT: add.s32 %r7, %r4, %r5; 136; CHECK-NEXT: add.s32 %r8, %r6, %r7; 137; CHECK-NEXT: st.param.b32 [func_retval0], %r8; 138; CHECK-NEXT: ret; 139 %v = bitcast i32 %a to <4 x i8> 140 %r0 = extractelement <4 x i8> %v, i64 0 141 %r1 = extractelement <4 x i8> %v, i64 1 142 %r2 = extractelement <4 x i8> %v, i64 2 143 %r3 = extractelement <4 x i8> %v, i64 3 144 %r0i = zext i8 %r0 to i32 145 %r1i = zext i8 %r1 to i32 146 %r2i = zext i8 %r2 to i32 147 %r3i = zext i8 %r3 to i32 148 %r01 = add i32 %r0i, %r1i 149 %r23 = add i32 %r2i, %r3i 150 %r = add i32 %r01, %r23 151 ret i32 %r 152} 153 154 155 156define i16 @test_v8i8(i64 %a) { 157; CHECK-LABEL: test_v8i8( 158; CHECK: { 159; CHECK-NEXT: .reg .b16 %rs<16>; 160; CHECK-NEXT: .reg .b32 %r<12>; 161; CHECK-NEXT: .reg .b64 %rd<2>; 162; CHECK-EMPTY: 163; CHECK-NEXT: // %bb.0: 164; CHECK-NEXT: ld.param.u64 %rd1, [test_v8i8_param_0]; 165; CHECK-NEXT: { .reg .b32 tmp; mov.b64 {tmp, %r1}, %rd1; } 166; CHECK-NEXT: cvt.u32.u64 %r2, %rd1; 167; CHECK-NEXT: bfe.s32 %r3, %r2, 0, 8; 168; CHECK-NEXT: cvt.s8.s32 %rs1, %r3; 169; CHECK-NEXT: bfe.s32 %r4, %r2, 8, 8; 170; CHECK-NEXT: cvt.s8.s32 %rs2, %r4; 171; CHECK-NEXT: bfe.s32 %r5, %r2, 16, 8; 172; CHECK-NEXT: cvt.s8.s32 %rs3, %r5; 173; CHECK-NEXT: bfe.s32 %r6, %r2, 24, 8; 174; CHECK-NEXT: cvt.s8.s32 %rs4, %r6; 175; CHECK-NEXT: bfe.s32 %r7, %r1, 0, 8; 176; CHECK-NEXT: cvt.s8.s32 %rs5, %r7; 177; CHECK-NEXT: bfe.s32 %r8, %r1, 8, 8; 178; CHECK-NEXT: cvt.s8.s32 %rs6, %r8; 179; CHECK-NEXT: bfe.s32 %r9, %r1, 16, 8; 180; CHECK-NEXT: cvt.s8.s32 %rs7, %r9; 181; CHECK-NEXT: bfe.s32 %r10, %r1, 24, 8; 182; CHECK-NEXT: cvt.s8.s32 %rs8, %r10; 183; CHECK-NEXT: add.s16 %rs9, %rs1, %rs2; 184; CHECK-NEXT: add.s16 %rs10, %rs3, %rs4; 185; CHECK-NEXT: add.s16 %rs11, %rs5, %rs6; 186; CHECK-NEXT: add.s16 %rs12, %rs7, %rs8; 187; CHECK-NEXT: add.s16 %rs13, %rs9, %rs10; 188; CHECK-NEXT: add.s16 %rs14, %rs11, %rs12; 189; CHECK-NEXT: add.s16 %rs15, %rs13, %rs14; 190; CHECK-NEXT: cvt.u32.u16 %r11, %rs15; 191; CHECK-NEXT: st.param.b32 [func_retval0], %r11; 192; CHECK-NEXT: ret; 193 %v = bitcast i64 %a to <8 x i8> 194 %r0 = extractelement <8 x i8> %v, i64 0 195 %r1 = extractelement <8 x i8> %v, i64 1 196 %r2 = extractelement <8 x i8> %v, i64 2 197 %r3 = extractelement <8 x i8> %v, i64 3 198 %r4 = extractelement <8 x i8> %v, i64 4 199 %r5 = extractelement <8 x i8> %v, i64 5 200 %r6 = extractelement <8 x i8> %v, i64 6 201 %r7 = extractelement <8 x i8> %v, i64 7 202 %r0i = sext i8 %r0 to i16 203 %r1i = sext i8 %r1 to i16 204 %r2i = sext i8 %r2 to i16 205 %r3i = sext i8 %r3 to i16 206 %r4i = sext i8 %r4 to i16 207 %r5i = sext i8 %r5 to i16 208 %r6i = sext i8 %r6 to i16 209 %r7i = sext i8 %r7 to i16 210 %r01 = add i16 %r0i, %r1i 211 %r23 = add i16 %r2i, %r3i 212 %r45 = add i16 %r4i, %r5i 213 %r67 = add i16 %r6i, %r7i 214 %r0123 = add i16 %r01, %r23 215 %r4567 = add i16 %r45, %r67 216 %r = add i16 %r0123, %r4567 217 ret i16 %r 218} 219