1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s 3 4; Odd divisor 5define <4 x i32> @test_urem_odd_25(<4 x i32> %X) nounwind { 6; CHECK-LABEL: test_urem_odd_25: 7; CHECK: // %bb.0: 8; CHECK-NEXT: mov w8, #23593 // =0x5c29 9; CHECK-NEXT: movi v2.4s, #1 10; CHECK-NEXT: movk w8, #49807, lsl #16 11; CHECK-NEXT: dup v1.4s, w8 12; CHECK-NEXT: mov w8, #28835 // =0x70a3 13; CHECK-NEXT: movk w8, #2621, lsl #16 14; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s 15; CHECK-NEXT: dup v1.4s, w8 16; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s 17; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 18; CHECK-NEXT: ret 19 %urem = urem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25> 20 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 21 %ret = zext <4 x i1> %cmp to <4 x i32> 22 ret <4 x i32> %ret 23} 24 25; Even divisors 26define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind { 27; CHECK-LABEL: test_urem_even_100: 28; CHECK: // %bb.0: 29; CHECK-NEXT: mov w8, #23593 // =0x5c29 30; CHECK-NEXT: movi v2.4s, #1 31; CHECK-NEXT: movk w8, #49807, lsl #16 32; CHECK-NEXT: dup v1.4s, w8 33; CHECK-NEXT: mov w8, #23592 // =0x5c28 34; CHECK-NEXT: movk w8, #655, lsl #16 35; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s 36; CHECK-NEXT: shl v1.4s, v0.4s, #30 37; CHECK-NEXT: usra v1.4s, v0.4s, #2 38; CHECK-NEXT: dup v0.4s, w8 39; CHECK-NEXT: cmhs v0.4s, v0.4s, v1.4s 40; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 41; CHECK-NEXT: ret 42 %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100> 43 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 44 %ret = zext <4 x i1> %cmp to <4 x i32> 45 ret <4 x i32> %ret 46} 47 48; Negative divisors should be negated, and thus this is still splat vectors. 49 50; Odd divisor 51define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind { 52; CHECK-LABEL: test_urem_odd_neg25: 53; CHECK: // %bb.0: 54; CHECK-NEXT: adrp x8, .LCPI2_0 55; CHECK-NEXT: movi v2.4s, #1 56; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] 57; CHECK-NEXT: adrp x8, .LCPI2_1 58; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s 59; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_1] 60; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s 61; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 62; CHECK-NEXT: ret 63 %urem = urem <4 x i32> %X, <i32 25, i32 -25, i32 -25, i32 25> 64 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 65 %ret = zext <4 x i1> %cmp to <4 x i32> 66 ret <4 x i32> %ret 67} 68 69; Even divisors 70define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind { 71; CHECK-LABEL: test_urem_even_neg100: 72; CHECK: // %bb.0: 73; CHECK-NEXT: adrp x8, .LCPI3_0 74; CHECK-NEXT: movi v2.4s, #1 75; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] 76; CHECK-NEXT: adrp x8, .LCPI3_1 77; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s 78; CHECK-NEXT: shl v1.4s, v0.4s, #30 79; CHECK-NEXT: usra v1.4s, v0.4s, #2 80; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_1] 81; CHECK-NEXT: cmhs v0.4s, v0.4s, v1.4s 82; CHECK-NEXT: and v0.16b, v0.16b, v2.16b 83; CHECK-NEXT: ret 84 %urem = urem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100> 85 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 86 %ret = zext <4 x i1> %cmp to <4 x i32> 87 ret <4 x i32> %ret 88} 89 90;------------------------------------------------------------------------------; 91; Comparison constant has undef elements. 92;------------------------------------------------------------------------------; 93 94define <4 x i32> @test_urem_odd_undef1(<4 x i32> %X) nounwind { 95; CHECK-LABEL: test_urem_odd_undef1: 96; CHECK: // %bb.0: 97; CHECK-NEXT: mov w8, #34079 // =0x851f 98; CHECK-NEXT: movk w8, #20971, lsl #16 99; CHECK-NEXT: dup v1.4s, w8 100; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s 101; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s 102; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s 103; CHECK-NEXT: movi v2.4s, #25 104; CHECK-NEXT: ushr v1.4s, v1.4s, #3 105; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s 106; CHECK-NEXT: movi v1.4s, #1 107; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 108; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 109; CHECK-NEXT: ret 110 %urem = urem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25> 111 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 undef, i32 0> 112 %ret = zext <4 x i1> %cmp to <4 x i32> 113 ret <4 x i32> %ret 114} 115 116define <4 x i32> @test_urem_even_undef1(<4 x i32> %X) nounwind { 117; CHECK-LABEL: test_urem_even_undef1: 118; CHECK: // %bb.0: 119; CHECK-NEXT: mov w8, #34079 // =0x851f 120; CHECK-NEXT: movk w8, #20971, lsl #16 121; CHECK-NEXT: dup v1.4s, w8 122; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s 123; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s 124; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s 125; CHECK-NEXT: movi v2.4s, #100 126; CHECK-NEXT: ushr v1.4s, v1.4s, #5 127; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s 128; CHECK-NEXT: movi v1.4s, #1 129; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 130; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 131; CHECK-NEXT: ret 132 %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100> 133 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 undef, i32 0> 134 %ret = zext <4 x i1> %cmp to <4 x i32> 135 ret <4 x i32> %ret 136} 137 138;------------------------------------------------------------------------------; 139; Negative tests 140;------------------------------------------------------------------------------; 141 142define <4 x i32> @test_urem_one_eq(<4 x i32> %X) nounwind { 143; CHECK-LABEL: test_urem_one_eq: 144; CHECK: // %bb.0: 145; CHECK-NEXT: movi v0.4s, #1 146; CHECK-NEXT: ret 147 %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1> 148 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 149 %ret = zext <4 x i1> %cmp to <4 x i32> 150 ret <4 x i32> %ret 151} 152define <4 x i32> @test_urem_one_ne(<4 x i32> %X) nounwind { 153; CHECK-LABEL: test_urem_one_ne: 154; CHECK: // %bb.0: 155; CHECK-NEXT: movi v0.2d, #0000000000000000 156; CHECK-NEXT: ret 157 %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1> 158 %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 159 %ret = zext <4 x i1> %cmp to <4 x i32> 160 ret <4 x i32> %ret 161} 162 163; We can lower remainder of division by powers of two much better elsewhere. 164define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind { 165; CHECK-LABEL: test_urem_pow2: 166; CHECK: // %bb.0: 167; CHECK-NEXT: movi v1.4s, #15 168; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 169; CHECK-NEXT: movi v1.4s, #1 170; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 171; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 172; CHECK-NEXT: ret 173 %urem = urem <4 x i32> %X, <i32 16, i32 16, i32 16, i32 16> 174 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 175 %ret = zext <4 x i1> %cmp to <4 x i32> 176 ret <4 x i32> %ret 177} 178 179; We could lower remainder of division by INT_MIN much better elsewhere. 180define <4 x i32> @test_urem_int_min(<4 x i32> %X) nounwind { 181; CHECK-LABEL: test_urem_int_min: 182; CHECK: // %bb.0: 183; CHECK-NEXT: bic v0.4s, #128, lsl #24 184; CHECK-NEXT: movi v1.4s, #1 185; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 186; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 187; CHECK-NEXT: ret 188 %urem = urem <4 x i32> %X, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648> 189 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 190 %ret = zext <4 x i1> %cmp to <4 x i32> 191 ret <4 x i32> %ret 192} 193 194; We could lower remainder of division by all-ones much better elsewhere. 195define <4 x i32> @test_urem_allones(<4 x i32> %X) nounwind { 196; CHECK-LABEL: test_urem_allones: 197; CHECK: // %bb.0: 198; CHECK-NEXT: movi v1.4s, #1 199; CHECK-NEXT: neg v0.4s, v0.4s 200; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s 201; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 202; CHECK-NEXT: ret 203 %urem = urem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295> 204 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0> 205 %ret = zext <4 x i1> %cmp to <4 x i32> 206 ret <4 x i32> %ret 207} 208