1; RUN: llc -O2 < %s -mtriple=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix=O2 --check-prefix=CHECK 2; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_35 | FileCheck %s --check-prefix=O0 --check-prefix=CHECK 3; RUN: %if ptxas %{ llc -O2 < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %} 4; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_35 | %ptxas-verify %} 5 6; The following IR 7; 8; quot = n / d 9; rem = n % d 10; 11; should be transformed into 12; 13; quot = n / d 14; rem = n - (n / d) * d 15; 16; during NVPTX isel, at -O2. At -O0, we should leave it alone. 17 18; CHECK-LABEL: sdiv32( 19define void @sdiv32(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) { 20 ; CHECK: div.s32 [[quot:%r[0-9]+]], [[num:%r[0-9]+]], [[den:%r[0-9]+]]; 21 %quot = sdiv i32 %n, %d 22 23 ; O0: rem.s32 24 ; (This is unfortunately order-sensitive, even though mul is commutative.) 25 ; O2: mul.lo.s32 [[mul:%r[0-9]+]], [[quot]], [[den]]; 26 ; O2: sub.s32 [[rem:%r[0-9]+]], [[num]], [[mul]] 27 %rem = srem i32 %n, %d 28 29 ; O2: st{{.*}}[[quot]] 30 store i32 %quot, ptr %quot_ret 31 ; O2: st{{.*}}[[rem]] 32 store i32 %rem, ptr %rem_ret 33 ret void 34} 35 36; CHECK-LABEL: udiv32( 37define void @udiv32(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) { 38 ; CHECK: div.u32 [[quot:%r[0-9]+]], [[num:%r[0-9]+]], [[den:%r[0-9]+]]; 39 %quot = udiv i32 %n, %d 40 41 ; O0: rem.u32 42 43 ; Selection DAG doesn't know whether this is signed or unsigned 44 ; multiplication and subtraction, but it doesn't make a difference either 45 ; way. 46 ; O2: mul.lo.{{u|s}}32 [[mul:%r[0-9]+]], [[quot]], [[den]]; 47 ; O2: sub.{{u|s}}32 [[rem:%r[0-9]+]], [[num]], [[mul]] 48 %rem = urem i32 %n, %d 49 50 ; O2: st{{.*}}[[quot]] 51 store i32 %quot, ptr %quot_ret 52 ; O2: st{{.*}}[[rem]] 53 store i32 %rem, ptr %rem_ret 54 ret void 55} 56 57; Check that we don't perform this optimization if one operation is signed and 58; the other isn't. 59; CHECK-LABEL: mismatched_types1( 60define void @mismatched_types1(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) { 61 ; CHECK: div.u32 62 ; CHECK: rem.s32 63 %quot = udiv i32 %n, %d 64 %rem = srem i32 %n, %d 65 store i32 %quot, ptr %quot_ret 66 store i32 %rem, ptr %rem_ret 67 ret void 68} 69 70; CHECK-LABEL: mismatched_types2( 71define void @mismatched_types2(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) { 72 ; CHECK: div.s32 73 ; CHECK: rem.u32 74 %quot = sdiv i32 %n, %d 75 %rem = urem i32 %n, %d 76 store i32 %quot, ptr %quot_ret 77 store i32 %rem, ptr %rem_ret 78 ret void 79} 80 81; Check that we don't perform this optimization if the inputs to the div don't 82; match the inputs to the rem. 83; CHECK-LABEL: mismatched_inputs1( 84define void @mismatched_inputs1(i32 %n, i32 %d, ptr %quot_ret, ptr %rem_ret) { 85 ; CHECK: div.s32 86 ; CHECK: rem.s32 87 %quot = sdiv i32 %n, %d 88 %rem = srem i32 %d, %n 89 store i32 %quot, ptr %quot_ret 90 store i32 %rem, ptr %rem_ret 91 ret void 92} 93 94; CHECK-LABEL: mismatched_inputs2( 95define void @mismatched_inputs2(i32 %n1, i32 %n2, i32 %d, ptr %quot_ret, ptr %rem_ret) { 96 ; CHECK: div.s32 97 ; CHECK: rem.s32 98 %quot = sdiv i32 %n1, %d 99 %rem = srem i32 %n2, %d 100 store i32 %quot, ptr %quot_ret 101 store i32 %rem, ptr %rem_ret 102 ret void 103} 104 105; CHECK-LABEL: mismatched_inputs3( 106define void @mismatched_inputs3(i32 %n, i32 %d1, i32 %d2, ptr %quot_ret, ptr %rem_ret) { 107 ; CHECK: div.s32 108 ; CHECK: rem.s32 109 %quot = sdiv i32 %n, %d1 110 %rem = srem i32 %n, %d2 111 store i32 %quot, ptr %quot_ret 112 store i32 %rem, ptr %rem_ret 113 ret void 114} 115