1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 -O1 | FileCheck %s 3; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O1 | FileCheck %s 4; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 -O1 | %ptxas-verify %} 5; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 -O1 | %ptxas-verify %} 6 7define i32 @test1(i32 %n, i32 %m) { 8; 9; CHECK-LABEL: test1( 10; CHECK: { 11; CHECK-NEXT: .reg .b32 %r<4>; 12; CHECK-EMPTY: 13; CHECK-NEXT: // %bb.0: 14; CHECK-NEXT: ld.param.u32 %r1, [test1_param_0]; 15; CHECK-NEXT: ld.param.u32 %r2, [test1_param_1]; 16; CHECK-NEXT: mad.lo.s32 %r3, %r2, %r1, %r2; 17; CHECK-NEXT: st.param.b32 [func_retval0], %r3; 18; CHECK-NEXT: ret; 19 %add = add i32 %n, 1 20 %mul = mul i32 %add, %m 21 ret i32 %mul 22} 23 24define i32 @test1_rev(i32 %n, i32 %m) { 25; 26; CHECK-LABEL: test1_rev( 27; CHECK: { 28; CHECK-NEXT: .reg .b32 %r<4>; 29; CHECK-EMPTY: 30; CHECK-NEXT: // %bb.0: 31; CHECK-NEXT: ld.param.u32 %r1, [test1_rev_param_0]; 32; CHECK-NEXT: ld.param.u32 %r2, [test1_rev_param_1]; 33; CHECK-NEXT: mad.lo.s32 %r3, %r2, %r1, %r2; 34; CHECK-NEXT: st.param.b32 [func_retval0], %r3; 35; CHECK-NEXT: ret; 36 %add = add i32 %n, 1 37 %mul = mul i32 %m, %add 38 ret i32 %mul 39} 40 41; Transpose (mul (select)) if it can then be folded to mad 42define i32 @test2(i32 %n, i32 %m, i32 %s) { 43; 44; CHECK-LABEL: test2( 45; CHECK: { 46; CHECK-NEXT: .reg .pred %p<2>; 47; CHECK-NEXT: .reg .b32 %r<6>; 48; CHECK-EMPTY: 49; CHECK-NEXT: // %bb.0: 50; CHECK-NEXT: ld.param.u32 %r1, [test2_param_0]; 51; CHECK-NEXT: ld.param.u32 %r2, [test2_param_1]; 52; CHECK-NEXT: ld.param.u32 %r3, [test2_param_2]; 53; CHECK-NEXT: setp.lt.s32 %p1, %r3, 1; 54; CHECK-NEXT: mad.lo.s32 %r4, %r2, %r1, %r2; 55; CHECK-NEXT: selp.b32 %r5, %r2, %r4, %p1; 56; CHECK-NEXT: st.param.b32 [func_retval0], %r5; 57; CHECK-NEXT: ret; 58 %add = add i32 %n, 1 59 %cond = icmp slt i32 %s, 1 60 %sel = select i1 %cond, i32 1, i32 %add 61 %mul = mul i32 %sel, %m 62 ret i32 %mul 63} 64 65;; Transpose (mul (select)) if it can then be folded to mad 66define i32 @test2_rev1(i32 %n, i32 %m, i32 %s) { 67; 68; CHECK-LABEL: test2_rev1( 69; CHECK: { 70; CHECK-NEXT: .reg .pred %p<2>; 71; CHECK-NEXT: .reg .b32 %r<6>; 72; CHECK-EMPTY: 73; CHECK-NEXT: // %bb.0: 74; CHECK-NEXT: ld.param.u32 %r1, [test2_rev1_param_0]; 75; CHECK-NEXT: ld.param.u32 %r2, [test2_rev1_param_1]; 76; CHECK-NEXT: ld.param.u32 %r3, [test2_rev1_param_2]; 77; CHECK-NEXT: setp.lt.s32 %p1, %r3, 1; 78; CHECK-NEXT: mad.lo.s32 %r4, %r2, %r1, %r2; 79; CHECK-NEXT: selp.b32 %r5, %r4, %r2, %p1; 80; CHECK-NEXT: st.param.b32 [func_retval0], %r5; 81; CHECK-NEXT: ret; 82 %add = add i32 %n, 1 83 %cond = icmp slt i32 %s, 1 84 %sel = select i1 %cond, i32 %add, i32 1 85 %mul = mul i32 %sel, %m 86 ret i32 %mul 87} 88 89;; Transpose (mul (select)) if it can then be folded to mad 90define i32 @test2_rev2(i32 %n, i32 %m, i32 %s) { 91; 92; CHECK-LABEL: test2_rev2( 93; CHECK: { 94; CHECK-NEXT: .reg .pred %p<2>; 95; CHECK-NEXT: .reg .b32 %r<6>; 96; CHECK-EMPTY: 97; CHECK-NEXT: // %bb.0: 98; CHECK-NEXT: ld.param.u32 %r1, [test2_rev2_param_0]; 99; CHECK-NEXT: ld.param.u32 %r2, [test2_rev2_param_1]; 100; CHECK-NEXT: ld.param.u32 %r3, [test2_rev2_param_2]; 101; CHECK-NEXT: setp.lt.s32 %p1, %r3, 1; 102; CHECK-NEXT: mad.lo.s32 %r4, %r2, %r1, %r2; 103; CHECK-NEXT: selp.b32 %r5, %r4, %r2, %p1; 104; CHECK-NEXT: st.param.b32 [func_retval0], %r5; 105; CHECK-NEXT: ret; 106 %add = add i32 %n, 1 107 %cond = icmp slt i32 %s, 1 108 %sel = select i1 %cond, i32 %add, i32 1 109 %mul = mul i32 %m, %sel 110 ret i32 %mul 111} 112 113;; Leave (mul (select)) intact if it transposing is not profitable 114define i32 @test3(i32 %n, i32 %m, i32 %s) { 115; 116; CHECK-LABEL: test3( 117; CHECK: { 118; CHECK-NEXT: .reg .pred %p<2>; 119; CHECK-NEXT: .reg .b32 %r<7>; 120; CHECK-EMPTY: 121; CHECK-NEXT: // %bb.0: 122; CHECK-NEXT: ld.param.u32 %r1, [test3_param_0]; 123; CHECK-NEXT: add.s32 %r2, %r1, 3; 124; CHECK-NEXT: ld.param.u32 %r3, [test3_param_1]; 125; CHECK-NEXT: ld.param.u32 %r4, [test3_param_2]; 126; CHECK-NEXT: setp.lt.s32 %p1, %r4, 1; 127; CHECK-NEXT: selp.b32 %r5, 1, %r2, %p1; 128; CHECK-NEXT: mul.lo.s32 %r6, %r5, %r3; 129; CHECK-NEXT: st.param.b32 [func_retval0], %r6; 130; CHECK-NEXT: ret; 131 %add = add i32 %n, 3 132 %cond = icmp slt i32 %s, 1 133 %sel = select i1 %cond, i32 1, i32 %add 134 %mul = mul i32 %sel, %m 135 ret i32 %mul 136} 137 138;; (add (select 0, (mul a, b)), c) -> (select (mad a, b, c), c) 139define i32 @test4(i32 %a, i32 %b, i32 %c, i1 %p) { 140; CHECK-LABEL: test4( 141; CHECK: { 142; CHECK-NEXT: .reg .pred %p<2>; 143; CHECK-NEXT: .reg .b16 %rs<3>; 144; CHECK-NEXT: .reg .b32 %r<6>; 145; CHECK-EMPTY: 146; CHECK-NEXT: // %bb.0: 147; CHECK-NEXT: ld.param.u8 %rs1, [test4_param_3]; 148; CHECK-NEXT: and.b16 %rs2, %rs1, 1; 149; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1; 150; CHECK-NEXT: ld.param.u32 %r1, [test4_param_0]; 151; CHECK-NEXT: ld.param.u32 %r2, [test4_param_1]; 152; CHECK-NEXT: ld.param.u32 %r3, [test4_param_2]; 153; CHECK-NEXT: mad.lo.s32 %r4, %r1, %r2, %r3; 154; CHECK-NEXT: selp.b32 %r5, %r4, %r3, %p1; 155; CHECK-NEXT: st.param.b32 [func_retval0], %r5; 156; CHECK-NEXT: ret; 157 %mul = mul i32 %a, %b 158 %sel = select i1 %p, i32 %mul, i32 0 159 %add = add i32 %c, %sel 160 ret i32 %add 161} 162 163define i32 @test4_rev(i32 %a, i32 %b, i32 %c, i1 %p) { 164; CHECK-LABEL: test4_rev( 165; CHECK: { 166; CHECK-NEXT: .reg .pred %p<2>; 167; CHECK-NEXT: .reg .b16 %rs<3>; 168; CHECK-NEXT: .reg .b32 %r<6>; 169; CHECK-EMPTY: 170; CHECK-NEXT: // %bb.0: 171; CHECK-NEXT: ld.param.u8 %rs1, [test4_rev_param_3]; 172; CHECK-NEXT: and.b16 %rs2, %rs1, 1; 173; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 1; 174; CHECK-NEXT: ld.param.u32 %r1, [test4_rev_param_0]; 175; CHECK-NEXT: ld.param.u32 %r2, [test4_rev_param_1]; 176; CHECK-NEXT: ld.param.u32 %r3, [test4_rev_param_2]; 177; CHECK-NEXT: mad.lo.s32 %r4, %r1, %r2, %r3; 178; CHECK-NEXT: selp.b32 %r5, %r3, %r4, %p1; 179; CHECK-NEXT: st.param.b32 [func_retval0], %r5; 180; CHECK-NEXT: ret; 181 %mul = mul i32 %a, %b 182 %sel = select i1 %p, i32 0, i32 %mul 183 %add = add i32 %c, %sel 184 ret i32 %add 185} 186 187declare i32 @use(i32 %0, i32 %1) 188 189define i32 @test_mad_multi_use(i32 %a, i32 %b, i32 %c) { 190; CHECK-LABEL: test_mad_multi_use( 191; CHECK: { 192; CHECK-NEXT: .reg .b32 %r<8>; 193; CHECK-EMPTY: 194; CHECK-NEXT: // %bb.0: 195; CHECK-NEXT: ld.param.u32 %r1, [test_mad_multi_use_param_0]; 196; CHECK-NEXT: ld.param.u32 %r2, [test_mad_multi_use_param_1]; 197; CHECK-NEXT: mul.lo.s32 %r3, %r1, %r2; 198; CHECK-NEXT: ld.param.u32 %r4, [test_mad_multi_use_param_2]; 199; CHECK-NEXT: add.s32 %r5, %r3, %r4; 200; CHECK-NEXT: { // callseq 0, 0 201; CHECK-NEXT: .param .b32 param0; 202; CHECK-NEXT: st.param.b32 [param0], %r3; 203; CHECK-NEXT: .param .b32 param1; 204; CHECK-NEXT: st.param.b32 [param1], %r5; 205; CHECK-NEXT: .param .b32 retval0; 206; CHECK-NEXT: call.uni (retval0), 207; CHECK-NEXT: use, 208; CHECK-NEXT: ( 209; CHECK-NEXT: param0, 210; CHECK-NEXT: param1 211; CHECK-NEXT: ); 212; CHECK-NEXT: ld.param.b32 %r6, [retval0]; 213; CHECK-NEXT: } // callseq 0 214; CHECK-NEXT: st.param.b32 [func_retval0], %r6; 215; CHECK-NEXT: ret; 216 %mul = mul i32 %a, %b 217 %add = add i32 %mul, %c 218 %res = call i32 @use(i32 %mul, i32 %add) 219 ret i32 %res 220} 221 222;; This case relies on mad x 1 y => add x y, previously we emit: 223;; mad.lo.s32 %r3, %r1, 1, %r2; 224define i32 @test_mad_fold(i32 %x) { 225; CHECK-LABEL: test_mad_fold( 226; CHECK: { 227; CHECK-NEXT: .reg .b32 %r<7>; 228; CHECK-EMPTY: 229; CHECK-NEXT: // %bb.0: 230; CHECK-NEXT: ld.param.u32 %r1, [test_mad_fold_param_0]; 231; CHECK-NEXT: mul.hi.s32 %r2, %r1, -2147221471; 232; CHECK-NEXT: add.s32 %r3, %r2, %r1; 233; CHECK-NEXT: shr.u32 %r4, %r3, 31; 234; CHECK-NEXT: shr.s32 %r5, %r3, 12; 235; CHECK-NEXT: add.s32 %r6, %r5, %r4; 236; CHECK-NEXT: st.param.b32 [func_retval0], %r6; 237; CHECK-NEXT: ret; 238 %div = sdiv i32 %x, 8191 239 ret i32 %div 240} 241