1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s 3 4;------------------------------------------------------------------------------; 5; Odd divisors 6;------------------------------------------------------------------------------; 7 8define i32 @test_srem_odd(i32 %X) nounwind { 9; CHECK-LABEL: test_srem_odd: 10; CHECK: // %bb.0: 11; CHECK-NEXT: mov w8, #52429 // =0xcccd 12; CHECK-NEXT: mov w9, #39321 // =0x9999 13; CHECK-NEXT: movk w8, #52428, lsl #16 14; CHECK-NEXT: movk w9, #6553, lsl #16 15; CHECK-NEXT: madd w8, w0, w8, w9 16; CHECK-NEXT: mov w9, #858993459 // =0x33333333 17; CHECK-NEXT: cmp w8, w9 18; CHECK-NEXT: cset w0, lo 19; CHECK-NEXT: ret 20 %srem = srem i32 %X, 5 21 %cmp = icmp eq i32 %srem, 0 22 %ret = zext i1 %cmp to i32 23 ret i32 %ret 24} 25 26define i32 @test_srem_odd_25(i32 %X) nounwind { 27; CHECK-LABEL: test_srem_odd_25: 28; CHECK: // %bb.0: 29; CHECK-NEXT: mov w8, #23593 // =0x5c29 30; CHECK-NEXT: mov w9, #47185 // =0xb851 31; CHECK-NEXT: movk w8, #49807, lsl #16 32; CHECK-NEXT: movk w9, #1310, lsl #16 33; CHECK-NEXT: madd w8, w0, w8, w9 34; CHECK-NEXT: mov w9, #28835 // =0x70a3 35; CHECK-NEXT: movk w9, #2621, lsl #16 36; CHECK-NEXT: cmp w8, w9 37; CHECK-NEXT: cset w0, lo 38; CHECK-NEXT: ret 39 %srem = srem i32 %X, 25 40 %cmp = icmp eq i32 %srem, 0 41 %ret = zext i1 %cmp to i32 42 ret i32 %ret 43} 44 45; This is like test_srem_odd, except the divisor has bit 30 set. 46define i32 @test_srem_odd_bit30(i32 %X) nounwind { 47; CHECK-LABEL: test_srem_odd_bit30: 48; CHECK: // %bb.0: 49; CHECK-NEXT: mov w8, #43691 // =0xaaab 50; CHECK-NEXT: mov w9, #1 // =0x1 51; CHECK-NEXT: movk w8, #27306, lsl #16 52; CHECK-NEXT: madd w8, w0, w8, w9 53; CHECK-NEXT: cmp w8, #3 54; CHECK-NEXT: cset w0, lo 55; CHECK-NEXT: ret 56 %srem = srem i32 %X, 1073741827 57 %cmp = icmp eq i32 %srem, 0 58 %ret = zext i1 %cmp to i32 59 ret i32 %ret 60} 61 62; This is like test_srem_odd, except the divisor has bit 31 set. 63define i32 @test_srem_odd_bit31(i32 %X) nounwind { 64; CHECK-LABEL: test_srem_odd_bit31: 65; CHECK: // %bb.0: 66; CHECK-NEXT: mov w8, #21845 // =0x5555 67; CHECK-NEXT: mov w9, #1 // =0x1 68; CHECK-NEXT: movk w8, #54613, lsl #16 69; CHECK-NEXT: madd w8, w0, w8, w9 70; CHECK-NEXT: cmp w8, #3 71; CHECK-NEXT: cset w0, lo 72; CHECK-NEXT: ret 73 %srem = srem i32 %X, 2147483651 74 %cmp = icmp eq i32 %srem, 0 75 %ret = zext i1 %cmp to i32 76 ret i32 %ret 77} 78 79;------------------------------------------------------------------------------; 80; Even divisors 81;------------------------------------------------------------------------------; 82 83define i16 @test_srem_even(i16 %X) nounwind { 84; CHECK-LABEL: test_srem_even: 85; CHECK: // %bb.0: 86; CHECK-NEXT: mov w8, #28087 // =0x6db7 87; CHECK-NEXT: mov w9, #4680 // =0x1248 88; CHECK-NEXT: madd w8, w0, w8, w9 89; CHECK-NEXT: lsl w10, w8, #15 90; CHECK-NEXT: bfxil w10, w8, #1, #15 91; CHECK-NEXT: cmp w9, w10, uxth 92; CHECK-NEXT: cset w0, lo 93; CHECK-NEXT: ret 94 %srem = srem i16 %X, 14 95 %cmp = icmp ne i16 %srem, 0 96 %ret = zext i1 %cmp to i16 97 ret i16 %ret 98} 99 100define i32 @test_srem_even_100(i32 %X) nounwind { 101; CHECK-LABEL: test_srem_even_100: 102; CHECK: // %bb.0: 103; CHECK-NEXT: mov w8, #23593 // =0x5c29 104; CHECK-NEXT: mov w9, #47184 // =0xb850 105; CHECK-NEXT: movk w8, #49807, lsl #16 106; CHECK-NEXT: movk w9, #1310, lsl #16 107; CHECK-NEXT: madd w8, w0, w8, w9 108; CHECK-NEXT: mov w9, #23593 // =0x5c29 109; CHECK-NEXT: movk w9, #655, lsl #16 110; CHECK-NEXT: ror w8, w8, #2 111; CHECK-NEXT: cmp w8, w9 112; CHECK-NEXT: cset w0, lo 113; CHECK-NEXT: ret 114 %srem = srem i32 %X, 100 115 %cmp = icmp eq i32 %srem, 0 116 %ret = zext i1 %cmp to i32 117 ret i32 %ret 118} 119 120; This is like test_srem_even, except the divisor has bit 30 set. 121define i32 @test_srem_even_bit30(i32 %X) nounwind { 122; CHECK-LABEL: test_srem_even_bit30: 123; CHECK: // %bb.0: 124; CHECK-NEXT: mov w8, #20165 // =0x4ec5 125; CHECK-NEXT: mov w9, #8 // =0x8 126; CHECK-NEXT: movk w8, #64748, lsl #16 127; CHECK-NEXT: madd w8, w0, w8, w9 128; CHECK-NEXT: ror w8, w8, #3 129; CHECK-NEXT: cmp w8, #3 130; CHECK-NEXT: cset w0, lo 131; CHECK-NEXT: ret 132 %srem = srem i32 %X, 1073741928 133 %cmp = icmp eq i32 %srem, 0 134 %ret = zext i1 %cmp to i32 135 ret i32 %ret 136} 137 138; This is like test_srem_odd, except the divisor has bit 31 set. 139define i32 @test_srem_even_bit31(i32 %X) nounwind { 140; CHECK-LABEL: test_srem_even_bit31: 141; CHECK: // %bb.0: 142; CHECK-NEXT: mov w8, #1285 // =0x505 143; CHECK-NEXT: mov w9, #2 // =0x2 144; CHECK-NEXT: movk w8, #50437, lsl #16 145; CHECK-NEXT: madd w8, w0, w8, w9 146; CHECK-NEXT: ror w8, w8, #1 147; CHECK-NEXT: cmp w8, #3 148; CHECK-NEXT: cset w0, lo 149; CHECK-NEXT: ret 150 %srem = srem i32 %X, 2147483750 151 %cmp = icmp eq i32 %srem, 0 152 %ret = zext i1 %cmp to i32 153 ret i32 %ret 154} 155 156;------------------------------------------------------------------------------; 157; Special case 158;------------------------------------------------------------------------------; 159 160; 'NE' predicate is fine too. 161define i32 @test_srem_odd_setne(i32 %X) nounwind { 162; CHECK-LABEL: test_srem_odd_setne: 163; CHECK: // %bb.0: 164; CHECK-NEXT: mov w8, #52429 // =0xcccd 165; CHECK-NEXT: mov w9, #39321 // =0x9999 166; CHECK-NEXT: movk w8, #52428, lsl #16 167; CHECK-NEXT: movk w9, #6553, lsl #16 168; CHECK-NEXT: madd w8, w0, w8, w9 169; CHECK-NEXT: mov w9, #13106 // =0x3332 170; CHECK-NEXT: movk w9, #13107, lsl #16 171; CHECK-NEXT: cmp w8, w9 172; CHECK-NEXT: cset w0, hi 173; CHECK-NEXT: ret 174 %srem = srem i32 %X, 5 175 %cmp = icmp ne i32 %srem, 0 176 %ret = zext i1 %cmp to i32 177 ret i32 %ret 178} 179 180; The fold is only valid for positive divisors, negative-ones should be negated. 181define i32 @test_srem_negative_odd(i32 %X) nounwind { 182; CHECK-LABEL: test_srem_negative_odd: 183; CHECK: // %bb.0: 184; CHECK-NEXT: mov w8, #52429 // =0xcccd 185; CHECK-NEXT: mov w9, #39321 // =0x9999 186; CHECK-NEXT: movk w8, #52428, lsl #16 187; CHECK-NEXT: movk w9, #6553, lsl #16 188; CHECK-NEXT: madd w8, w0, w8, w9 189; CHECK-NEXT: mov w9, #13106 // =0x3332 190; CHECK-NEXT: movk w9, #13107, lsl #16 191; CHECK-NEXT: cmp w8, w9 192; CHECK-NEXT: cset w0, hi 193; CHECK-NEXT: ret 194 %srem = srem i32 %X, -5 195 %cmp = icmp ne i32 %srem, 0 196 %ret = zext i1 %cmp to i32 197 ret i32 %ret 198} 199define i32 @test_srem_negative_even(i32 %X) nounwind { 200; CHECK-LABEL: test_srem_negative_even: 201; CHECK: // %bb.0: 202; CHECK-NEXT: mov w8, #28087 // =0x6db7 203; CHECK-NEXT: mov w9, #9362 // =0x2492 204; CHECK-NEXT: movk w8, #46811, lsl #16 205; CHECK-NEXT: movk w9, #4681, lsl #16 206; CHECK-NEXT: madd w8, w0, w8, w9 207; CHECK-NEXT: ror w8, w8, #1 208; CHECK-NEXT: cmp w8, w9 209; CHECK-NEXT: cset w0, hi 210; CHECK-NEXT: ret 211 %srem = srem i32 %X, -14 212 %cmp = icmp ne i32 %srem, 0 213 %ret = zext i1 %cmp to i32 214 ret i32 %ret 215} 216 217;------------------------------------------------------------------------------; 218; Negative tests 219;------------------------------------------------------------------------------; 220 221; We can lower remainder of division by one much better elsewhere. 222define i32 @test_srem_one(i32 %X) nounwind { 223; CHECK-LABEL: test_srem_one: 224; CHECK: // %bb.0: 225; CHECK-NEXT: mov w0, #1 // =0x1 226; CHECK-NEXT: ret 227 %srem = srem i32 %X, 1 228 %cmp = icmp eq i32 %srem, 0 229 %ret = zext i1 %cmp to i32 230 ret i32 %ret 231} 232 233; We can lower remainder of division by powers of two much better elsewhere. 234define i32 @test_srem_pow2(i32 %X) nounwind { 235; CHECK-LABEL: test_srem_pow2: 236; CHECK: // %bb.0: 237; CHECK-NEXT: negs w8, w0 238; CHECK-NEXT: and w9, w0, #0xf 239; CHECK-NEXT: and w8, w8, #0xf 240; CHECK-NEXT: csneg w8, w9, w8, mi 241; CHECK-NEXT: cmp w8, #0 242; CHECK-NEXT: cset w0, eq 243; CHECK-NEXT: ret 244 %srem = srem i32 %X, 16 245 %cmp = icmp eq i32 %srem, 0 246 %ret = zext i1 %cmp to i32 247 ret i32 %ret 248} 249 250; The fold is only valid for positive divisors, and we can't negate INT_MIN. 251define i32 @test_srem_int_min(i32 %X) nounwind { 252; CHECK-LABEL: test_srem_int_min: 253; CHECK: // %bb.0: 254; CHECK-NEXT: negs w8, w0 255; CHECK-NEXT: and w9, w0, #0x7fffffff 256; CHECK-NEXT: and w8, w8, #0x7fffffff 257; CHECK-NEXT: csneg w8, w9, w8, mi 258; CHECK-NEXT: cmp w8, #0 259; CHECK-NEXT: cset w0, eq 260; CHECK-NEXT: ret 261 %srem = srem i32 %X, 2147483648 262 %cmp = icmp eq i32 %srem, 0 263 %ret = zext i1 %cmp to i32 264 ret i32 %ret 265} 266 267; We can lower remainder of division by all-ones much better elsewhere. 268define i32 @test_srem_allones(i32 %X) nounwind { 269; CHECK-LABEL: test_srem_allones: 270; CHECK: // %bb.0: 271; CHECK-NEXT: mov w0, #1 // =0x1 272; CHECK-NEXT: ret 273 %srem = srem i32 %X, 4294967295 274 %cmp = icmp eq i32 %srem, 0 275 %ret = zext i1 %cmp to i32 276 ret i32 %ret 277} 278