1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ 3; RUN: | FileCheck %s -check-prefix=RV64I 4 5; Test that we turn (sra (shl X, 32), 32-C) into (slli (sext.w X), C) 6 7define i64 @test1(i64 %a) nounwind { 8; RV64I-LABEL: test1: 9; RV64I: # %bb.0: 10; RV64I-NEXT: sext.w a0, a0 11; RV64I-NEXT: slli a0, a0, 2 12; RV64I-NEXT: ret 13 %1 = shl i64 %a, 32 14 %2 = ashr i64 %1, 30 15 ret i64 %2 16} 17 18define i64 @test2(i32 signext %a) nounwind { 19; RV64I-LABEL: test2: 20; RV64I: # %bb.0: 21; RV64I-NEXT: slli a0, a0, 3 22; RV64I-NEXT: ret 23 %1 = zext i32 %a to i64 24 %2 = shl i64 %1, 32 25 %3 = ashr i64 %2, 29 26 ret i64 %3 27} 28 29define i64 @test3(ptr %a) nounwind { 30; RV64I-LABEL: test3: 31; RV64I: # %bb.0: 32; RV64I-NEXT: lw a0, 0(a0) 33; RV64I-NEXT: slli a0, a0, 4 34; RV64I-NEXT: ret 35 %1 = load i32, ptr %a 36 %2 = zext i32 %1 to i64 37 %3 = shl i64 %2, 32 38 %4 = ashr i64 %3, 28 39 ret i64 %4 40} 41 42define i64 @test4(i32 signext %a, i32 signext %b) nounwind { 43; RV64I-LABEL: test4: 44; RV64I: # %bb.0: 45; RV64I-NEXT: addw a0, a0, a1 46; RV64I-NEXT: slli a0, a0, 30 47; RV64I-NEXT: ret 48 %1 = add i32 %a, %b 49 %2 = zext i32 %1 to i64 50 %3 = shl i64 %2, 32 51 %4 = ashr i64 %3, 2 52 ret i64 %4 53} 54 55define i64 @test5(i32 signext %a, i32 signext %b) nounwind { 56; RV64I-LABEL: test5: 57; RV64I: # %bb.0: 58; RV64I-NEXT: xor a0, a0, a1 59; RV64I-NEXT: slli a0, a0, 31 60; RV64I-NEXT: ret 61 %1 = xor i32 %a, %b 62 %2 = zext i32 %1 to i64 63 %3 = shl i64 %2, 32 64 %4 = ashr i64 %3, 1 65 ret i64 %4 66} 67 68define i64 @test6(i32 signext %a, i32 signext %b) nounwind { 69; RV64I-LABEL: test6: 70; RV64I: # %bb.0: 71; RV64I-NEXT: sllw a0, a0, a1 72; RV64I-NEXT: slli a0, a0, 16 73; RV64I-NEXT: ret 74 %1 = shl i32 %a, %b 75 %2 = zext i32 %1 to i64 76 %3 = shl i64 %2, 32 77 %4 = ashr i64 %3, 16 78 ret i64 %4 79} 80 81; The ashr+add+shl is canonical IR from InstCombine for 82; (sext (add (trunc X to i32), 1) to i32). 83; That can be implemented as addiw make sure we recover it. 84define i64 @test7(ptr %0, i64 %1) { 85; RV64I-LABEL: test7: 86; RV64I: # %bb.0: 87; RV64I-NEXT: addiw a0, a1, 1 88; RV64I-NEXT: ret 89 %3 = shl i64 %1, 32 90 %4 = add i64 %3, 4294967296 91 %5 = ashr exact i64 %4, 32 92 ret i64 %5 93} 94 95; The ashr+add+shl is canonical IR from InstCombine for 96; (sext (sub 1, (trunc X to i32)) to i32). 97; That can be implemented as (li 1)+subw make sure we recover it. 98define i64 @test8(ptr %0, i64 %1) { 99; RV64I-LABEL: test8: 100; RV64I: # %bb.0: 101; RV64I-NEXT: li a0, 1 102; RV64I-NEXT: subw a0, a0, a1 103; RV64I-NEXT: ret 104 %3 = mul i64 %1, -4294967296 105 %4 = add i64 %3, 4294967296 106 %5 = ashr exact i64 %4, 32 107 ret i64 %5 108} 109 110; The gep is here to introduce a shl by 2 after the ashr that will get folded 111; and make this harder to recover. 112define signext i32 @test9(ptr %0, i64 %1) { 113; RV64I-LABEL: test9: 114; RV64I: # %bb.0: 115; RV64I-NEXT: lui a2, 1 116; RV64I-NEXT: addi a2, a2, 1 117; RV64I-NEXT: addw a1, a1, a2 118; RV64I-NEXT: slli a1, a1, 2 119; RV64I-NEXT: add a0, a0, a1 120; RV64I-NEXT: lw a0, 0(a0) 121; RV64I-NEXT: ret 122 %3 = shl i64 %1, 32 123 %4 = add i64 %3, 17596481011712 ; 4097 << 32 124 %5 = ashr exact i64 %4, 32 125 %6 = getelementptr inbounds i32, ptr %0, i64 %5 126 %7 = load i32, ptr %6, align 4 127 ret i32 %7 128} 129 130; The gep is here to introduce a shl by 2 after the ashr that will get folded 131; and make this harder to recover. 132define signext i32 @test10(ptr %0, i64 %1) { 133; RV64I-LABEL: test10: 134; RV64I: # %bb.0: 135; RV64I-NEXT: lui a2, 30141 136; RV64I-NEXT: addi a2, a2, -747 137; RV64I-NEXT: subw a2, a2, a1 138; RV64I-NEXT: slli a2, a2, 2 139; RV64I-NEXT: add a0, a0, a2 140; RV64I-NEXT: lw a0, 0(a0) 141; RV64I-NEXT: ret 142 %3 = mul i64 %1, -4294967296 143 %4 = add i64 %3, 530242871224172544 ; 123456789 << 32 144 %5 = ashr exact i64 %4, 32 145 %6 = getelementptr inbounds i32, ptr %0, i64 %5 146 %7 = load i32, ptr %6, align 4 147 ret i32 %7 148} 149 150define i64 @test11(ptr %0, i64 %1) { 151; RV64I-LABEL: test11: 152; RV64I: # %bb.0: 153; RV64I-NEXT: lui a0, 524288 154; RV64I-NEXT: subw a0, a0, a1 155; RV64I-NEXT: ret 156 %3 = mul i64 %1, -4294967296 157 %4 = add i64 %3, 9223372036854775808 ;0x8000'0000'0000'0000 158 %5 = ashr exact i64 %4, 32 159 ret i64 %5 160} 161 162; Make sure we use slli+srai to enable the possibility of compressed 163define i32 @test12(i32 signext %0) { 164; RV64I-LABEL: test12: 165; RV64I: # %bb.0: 166; RV64I-NEXT: slli a0, a0, 49 167; RV64I-NEXT: srai a0, a0, 47 168; RV64I-NEXT: ret 169 %2 = shl i32 %0, 17 170 %3 = ashr i32 %2, 15 171 ret i32 %3 172} 173 174define i8 @test13(ptr %0, i64 %1) { 175; RV64I-LABEL: test13: 176; RV64I: # %bb.0: 177; RV64I-NEXT: li a2, 1 178; RV64I-NEXT: li a3, 2 179; RV64I-NEXT: subw a2, a2, a1 180; RV64I-NEXT: subw a3, a3, a1 181; RV64I-NEXT: add a2, a0, a2 182; RV64I-NEXT: add a0, a0, a3 183; RV64I-NEXT: lbu a1, 0(a2) 184; RV64I-NEXT: lbu a0, 0(a0) 185; RV64I-NEXT: add a0, a1, a0 186; RV64I-NEXT: ret 187 %3 = mul i64 %1, -4294967296 188 %4 = add i64 %3, 4294967296 ; 1 << 32 189 %5 = ashr exact i64 %4, 32 190 %6 = getelementptr inbounds i8, ptr %0, i64 %5 191 %7 = load i8, ptr %6, align 4 192 %8 = add i64 %3, 8589934592 ; 2 << 32 193 %9 = ashr exact i64 %8, 32 194 %10 = getelementptr inbounds i8, ptr %0, i64 %9 195 %11 = load i8, ptr %10, align 4 196 %12 = add i8 %7, %11 197 ret i8 %12 198} 199 200define signext i32 @test14(ptr %0, ptr %1, i64 %2) { 201; RV64I-LABEL: test14: 202; RV64I: # %bb.0: 203; RV64I-NEXT: li a3, 1 204; RV64I-NEXT: subw a3, a3, a2 205; RV64I-NEXT: add a0, a0, a3 206; RV64I-NEXT: slli a3, a3, 2 207; RV64I-NEXT: lbu a0, 0(a0) 208; RV64I-NEXT: add a1, a1, a3 209; RV64I-NEXT: lw a1, 0(a1) 210; RV64I-NEXT: addw a0, a0, a1 211; RV64I-NEXT: ret 212 %4 = mul i64 %2, -4294967296 213 %5 = add i64 %4, 4294967296 ; 1 << 32 214 %6 = ashr exact i64 %5, 32 215 %7 = getelementptr inbounds i8, ptr %0, i64 %6 216 %8 = load i8, ptr %7, align 4 217 %9 = zext i8 %8 to i32 218 %10 = getelementptr inbounds i32, ptr %1, i64 %6 219 %11 = load i32, ptr %10, align 4 220 %12 = add i32 %9, %11 221 ret i32 %12 222} 223