1; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' < %s | FileCheck %s 2 3target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" 4target triple = "nvptx64-nvidia-cuda" 5 6; We only use the div instruction -- the rem should be DCE'ed. 7; CHECK-LABEL: @div_only 8define void @div_only(i64 %a, i64 %b, ptr %retptr) { 9 ; CHECK: udiv i32 10 ; CHECK-NOT: urem 11 ; CHECK: sdiv i64 12 ; CHECK-NOT: rem 13 %d = sdiv i64 %a, %b 14 store i64 %d, ptr %retptr 15 ret void 16} 17 18; We only use the rem instruction -- the div should be DCE'ed. 19; CHECK-LABEL: @rem_only 20define void @rem_only(i64 %a, i64 %b, ptr %retptr) { 21 ; CHECK-NOT: div 22 ; CHECK: urem i32 23 ; CHECK-NOT: div 24 ; CHECK: rem i64 25 ; CHECK-NOT: div 26 %d = srem i64 %a, %b 27 store i64 %d, ptr %retptr 28 ret void 29} 30 31; CHECK-LABEL: @udiv_by_constant( 32define i64 @udiv_by_constant(i32 %a) { 33; CHECK-NEXT: [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64 34; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A_ZEXT]] to i32 35; CHECK-NEXT: [[TMP2:%.*]] = udiv i32 [[TMP1]], 50 36; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 37; CHECK-NEXT: ret i64 [[TMP3]] 38 39 %a.zext = zext i32 %a to i64 40 %wide.div = udiv i64 %a.zext, 50 41 ret i64 %wide.div 42} 43 44; CHECK-LABEL: @urem_by_constant( 45define i64 @urem_by_constant(i32 %a) { 46; CHECK-NEXT: [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64 47; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[A_ZEXT]] to i32 48; CHECK-NEXT: [[TMP2:%.*]] = urem i32 [[TMP1]], 50 49; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 50; CHECK-NEXT: ret i64 [[TMP3]] 51 52 %a.zext = zext i32 %a to i64 53 %wide.div = urem i64 %a.zext, 50 54 ret i64 %wide.div 55} 56 57; Negative test: instead of emitting a runtime check on %a, we prefer to let the 58; DAGCombiner transform this division by constant into a multiplication (with a 59; "magic constant"). 60; 61; CHECK-LABEL: @udiv_by_constant_negative_0( 62define i64 @udiv_by_constant_negative_0(i64 %a) { 63; CHECK-NEXT: [[WIDE_DIV:%.*]] = udiv i64 [[A:%.*]], 50 64; CHECK-NEXT: ret i64 [[WIDE_DIV]] 65 66 %wide.div = udiv i64 %a, 50 67 ret i64 %wide.div 68} 69 70; Negative test: while we know the dividend is short, the divisor isn't. This 71; test is here for completeness, but instcombine will optimize this to return 0. 72; 73; CHECK-LABEL: @udiv_by_constant_negative_1( 74define i64 @udiv_by_constant_negative_1(i32 %a) { 75; CHECK-NEXT: [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64 76; CHECK-NEXT: [[WIDE_DIV:%.*]] = udiv i64 [[A_ZEXT]], 8589934592 77; CHECK-NEXT: ret i64 [[WIDE_DIV]] 78 79 %a.zext = zext i32 %a to i64 80 %wide.div = udiv i64 %a.zext, 8589934592 ;; == 1 << 33 81 ret i64 %wide.div 82} 83 84; URem version of udiv_by_constant_negative_0 85; 86; CHECK-LABEL: @urem_by_constant_negative_0( 87define i64 @urem_by_constant_negative_0(i64 %a) { 88; CHECK-NEXT: [[WIDE_DIV:%.*]] = urem i64 [[A:%.*]], 50 89; CHECK-NEXT: ret i64 [[WIDE_DIV]] 90 91 %wide.div = urem i64 %a, 50 92 ret i64 %wide.div 93} 94 95; URem version of udiv_by_constant_negative_1 96; 97; CHECK-LABEL: @urem_by_constant_negative_1( 98define i64 @urem_by_constant_negative_1(i32 %a) { 99; CHECK-NEXT: [[A_ZEXT:%.*]] = zext i32 [[A:%.*]] to i64 100; CHECK-NEXT: [[WIDE_DIV:%.*]] = urem i64 [[A_ZEXT]], 8589934592 101; CHECK-NEXT: ret i64 [[WIDE_DIV]] 102 103 %a.zext = zext i32 %a to i64 104 %wide.div = urem i64 %a.zext, 8589934592 ;; == 1 << 33 105 ret i64 %wide.div 106} 107