1; Test 32-bit signed division and remainder. 2; 3; RUN: llc < %s -mtriple=s390x-linux-gnu -asm-verbose=0 | FileCheck %s 4 5declare i32 @foo() 6 7; Test register division. The result is in the second of the two registers. 8define void @f1(ptr %dest, i32 %a, i32 %b) { 9; CHECK-LABEL: f1: 10; CHECK: lgfr %r1, %r3 11; CHECK: dsgfr %r0, %r4 12; CHECK: st %r1, 0(%r2) 13; CHECK: br %r14 14 %div = sdiv i32 %a, %b 15 store i32 %div, ptr %dest 16 ret void 17} 18 19; Test register remainder. The result is in the first of the two registers. 20define void @f2(ptr %dest, i32 %a, i32 %b) { 21; CHECK-LABEL: f2: 22; CHECK: lgfr %r1, %r3 23; CHECK: dsgfr %r0, %r4 24; CHECK: st %r0, 0(%r2) 25; CHECK: br %r14 26 %rem = srem i32 %a, %b 27 store i32 %rem, ptr %dest 28 ret void 29} 30 31; Test that division and remainder use a single instruction. 32define i32 @f3(i32 %dummy, i32 %a, i32 %b) { 33; CHECK-LABEL: f3: 34; CHECK-NOT: %r2 35; CHECK: lgfr %r3, %r3 36; CHECK-NOT: %r2 37; CHECK: dsgfr %r2, %r4 38; CHECK-NOT: dsgfr 39; CHECK: or %r2, %r3 40; CHECK: br %r14 41 %div = sdiv i32 %a, %b 42 %rem = srem i32 %a, %b 43 %or = or i32 %rem, %div 44 ret i32 %or 45} 46 47; Check that the sign extension of the dividend is elided when the argument 48; is already sign-extended. 49define i32 @f4(i32 %dummy, i32 signext %a, i32 %b) { 50; CHECK-LABEL: f4: 51; CHECK-NOT: {{%r[234]}} 52; CHECK: dsgfr %r2, %r4 53; CHECK-NOT: dsgfr 54; CHECK: or %r3, %r2 55; CHECK: lr %r2, %r3 56; CHECK: br %r14 57 %div = sdiv i32 %a, %b 58 %rem = srem i32 %a, %b 59 %or = or i32 %rem, %div 60 ret i32 %or 61} 62 63; Test that memory dividends are loaded using sign extension (LGF). 64define i32 @f5(i32 %dummy, ptr %src, i32 %b) { 65; CHECK-LABEL: f5: 66; CHECK-NOT: %r2 67; CHECK: lgf %r3, 0(%r3) 68; CHECK-NOT: %r2 69; CHECK: dsgfr %r2, %r4 70; CHECK-NOT: dsgfr 71; CHECK: or %r2, %r3 72; CHECK: br %r14 73 %a = load i32, ptr %src 74 %div = sdiv i32 %a, %b 75 %rem = srem i32 %a, %b 76 %or = or i32 %rem, %div 77 ret i32 %or 78} 79 80; Test memory division with no displacement. 81define void @f6(ptr %dest, i32 %a, ptr %src) { 82; CHECK-LABEL: f6: 83; CHECK: lgfr %r1, %r3 84; CHECK: dsgf %r0, 0(%r4) 85; CHECK: st %r1, 0(%r2) 86; CHECK: br %r14 87 %b = load i32, ptr %src 88 %div = sdiv i32 %a, %b 89 store i32 %div, ptr %dest 90 ret void 91} 92 93; Test memory remainder with no displacement. 94define void @f7(ptr %dest, i32 %a, ptr %src) { 95; CHECK-LABEL: f7: 96; CHECK: lgfr %r1, %r3 97; CHECK: dsgf %r0, 0(%r4) 98; CHECK: st %r0, 0(%r2) 99; CHECK: br %r14 100 %b = load i32, ptr %src 101 %rem = srem i32 %a, %b 102 store i32 %rem, ptr %dest 103 ret void 104} 105 106; Test both memory division and memory remainder. 107define i32 @f8(i32 %dummy, i32 %a, ptr %src) { 108; CHECK-LABEL: f8: 109; CHECK-NOT: %r2 110; CHECK: lgfr %r3, %r3 111; CHECK-NOT: %r2 112; CHECK: dsgf %r2, 0(%r4) 113; CHECK-NOT: {{dsgf|dsgfr}} 114; CHECK: or %r2, %r3 115; CHECK: br %r14 116 %b = load i32, ptr %src 117 %div = sdiv i32 %a, %b 118 %rem = srem i32 %a, %b 119 %or = or i32 %rem, %div 120 ret i32 %or 121} 122 123; Check the high end of the DSGF range. 124define i32 @f9(i32 %dummy, i32 %a, ptr %src) { 125; CHECK-LABEL: f9: 126; CHECK: dsgf %r2, 524284(%r4) 127; CHECK: br %r14 128 %ptr = getelementptr i32, ptr %src, i64 131071 129 %b = load i32, ptr %ptr 130 %rem = srem i32 %a, %b 131 ret i32 %rem 132} 133 134; Check the next word up, which needs separate address logic. 135; Other sequences besides this one would be OK. 136define i32 @f10(i32 %dummy, i32 %a, ptr %src) { 137; CHECK-LABEL: f10: 138; CHECK: agfi %r4, 524288 139; CHECK: dsgf %r2, 0(%r4) 140; CHECK: br %r14 141 %ptr = getelementptr i32, ptr %src, i64 131072 142 %b = load i32, ptr %ptr 143 %rem = srem i32 %a, %b 144 ret i32 %rem 145} 146 147; Check the high end of the negative aligned DSGF range. 148define i32 @f11(i32 %dummy, i32 %a, ptr %src) { 149; CHECK-LABEL: f11: 150; CHECK: dsgf %r2, -4(%r4) 151; CHECK: br %r14 152 %ptr = getelementptr i32, ptr %src, i64 -1 153 %b = load i32, ptr %ptr 154 %rem = srem i32 %a, %b 155 ret i32 %rem 156} 157 158; Check the low end of the DSGF range. 159define i32 @f12(i32 %dummy, i32 %a, ptr %src) { 160; CHECK-LABEL: f12: 161; CHECK: dsgf %r2, -524288(%r4) 162; CHECK: br %r14 163 %ptr = getelementptr i32, ptr %src, i64 -131072 164 %b = load i32, ptr %ptr 165 %rem = srem i32 %a, %b 166 ret i32 %rem 167} 168 169; Check the next word down, which needs separate address logic. 170; Other sequences besides this one would be OK. 171define i32 @f13(i32 %dummy, i32 %a, ptr %src) { 172; CHECK-LABEL: f13: 173; CHECK: agfi %r4, -524292 174; CHECK: dsgf %r2, 0(%r4) 175; CHECK: br %r14 176 %ptr = getelementptr i32, ptr %src, i64 -131073 177 %b = load i32, ptr %ptr 178 %rem = srem i32 %a, %b 179 ret i32 %rem 180} 181 182; Check that DSGF allows an index. 183define i32 @f14(i32 %dummy, i32 %a, i64 %src, i64 %index) { 184; CHECK-LABEL: f14: 185; CHECK: dsgf %r2, 524287(%r5,%r4) 186; CHECK: br %r14 187 %add1 = add i64 %src, %index 188 %add2 = add i64 %add1, 524287 189 %ptr = inttoptr i64 %add2 to ptr 190 %b = load i32, ptr %ptr 191 %rem = srem i32 %a, %b 192 ret i32 %rem 193} 194 195; Make sure that we still use DSGFR rather than DSGR in cases where 196; a load and division cannot be combined. 197define void @f15(ptr %dest, ptr %src) { 198; CHECK-LABEL: f15: 199; CHECK: l [[B:%r[0-9]+]], 0(%r3) 200; CHECK: brasl %r14, foo@PLT 201; CHECK: lgfr %r1, %r2 202; CHECK: dsgfr %r0, [[B]] 203; CHECK: br %r14 204 %b = load i32, ptr %src 205 %a = call i32 @foo() 206 %div = sdiv i32 %a, %b 207 store i32 %div, ptr %dest 208 ret void 209} 210 211; Check that divisions of spilled values can use DSGF rather than DSGFR. 212define i32 @f16(ptr %ptr0) { 213; CHECK-LABEL: f16: 214; CHECK: brasl %r14, foo@PLT 215; CHECK: dsgf {{%r[0-9]+}}, 16{{[04]}}(%r15) 216; CHECK: br %r14 217 %ptr1 = getelementptr i32, ptr %ptr0, i64 2 218 %ptr2 = getelementptr i32, ptr %ptr0, i64 4 219 %ptr3 = getelementptr i32, ptr %ptr0, i64 6 220 %ptr4 = getelementptr i32, ptr %ptr0, i64 8 221 %ptr5 = getelementptr i32, ptr %ptr0, i64 10 222 %ptr6 = getelementptr i32, ptr %ptr0, i64 12 223 %ptr7 = getelementptr i32, ptr %ptr0, i64 14 224 %ptr8 = getelementptr i32, ptr %ptr0, i64 16 225 %ptr9 = getelementptr i32, ptr %ptr0, i64 18 226 227 %val0 = load i32, ptr %ptr0 228 %val1 = load i32, ptr %ptr1 229 %val2 = load i32, ptr %ptr2 230 %val3 = load i32, ptr %ptr3 231 %val4 = load i32, ptr %ptr4 232 %val5 = load i32, ptr %ptr5 233 %val6 = load i32, ptr %ptr6 234 %val7 = load i32, ptr %ptr7 235 %val8 = load i32, ptr %ptr8 236 %val9 = load i32, ptr %ptr9 237 238 %ret = call i32 @foo() 239 240 %div0 = sdiv i32 %ret, %val0 241 %div1 = sdiv i32 %div0, %val1 242 %div2 = sdiv i32 %div1, %val2 243 %div3 = sdiv i32 %div2, %val3 244 %div4 = sdiv i32 %div3, %val4 245 %div5 = sdiv i32 %div4, %val5 246 %div6 = sdiv i32 %div5, %val6 247 %div7 = sdiv i32 %div6, %val7 248 %div8 = sdiv i32 %div7, %val8 249 %div9 = sdiv i32 %div8, %val9 250 251 ret i32 %div9 252} 253