1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=riscv32 -mattr=+m | FileCheck %s --check-prefix=RV32I 3; RUN: llc < %s -mtriple=riscv64 -mattr=+m | FileCheck %s --check-prefix=RV64I 4; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+zbb | FileCheck %s --check-prefix=RV32IZbb 5; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zbb | FileCheck %s --check-prefix=RV64IZbb 6 7declare i4 @llvm.usub.sat.i4(i4, i4) 8declare i8 @llvm.usub.sat.i8(i8, i8) 9declare i16 @llvm.usub.sat.i16(i16, i16) 10declare i32 @llvm.usub.sat.i32(i32, i32) 11declare i64 @llvm.usub.sat.i64(i64, i64) 12 13define i32 @func32(i32 %x, i32 %y, i32 %z) nounwind { 14; RV32I-LABEL: func32: 15; RV32I: # %bb.0: 16; RV32I-NEXT: mul a1, a1, a2 17; RV32I-NEXT: sub a1, a0, a1 18; RV32I-NEXT: sltu a0, a0, a1 19; RV32I-NEXT: addi a0, a0, -1 20; RV32I-NEXT: and a0, a0, a1 21; RV32I-NEXT: ret 22; 23; RV64I-LABEL: func32: 24; RV64I: # %bb.0: 25; RV64I-NEXT: mul a1, a1, a2 26; RV64I-NEXT: subw a1, a0, a1 27; RV64I-NEXT: sext.w a0, a0 28; RV64I-NEXT: sltu a0, a0, a1 29; RV64I-NEXT: addi a0, a0, -1 30; RV64I-NEXT: and a0, a0, a1 31; RV64I-NEXT: ret 32; 33; RV32IZbb-LABEL: func32: 34; RV32IZbb: # %bb.0: 35; RV32IZbb-NEXT: mul a1, a1, a2 36; RV32IZbb-NEXT: maxu a0, a0, a1 37; RV32IZbb-NEXT: sub a0, a0, a1 38; RV32IZbb-NEXT: ret 39; 40; RV64IZbb-LABEL: func32: 41; RV64IZbb: # %bb.0: 42; RV64IZbb-NEXT: sext.w a0, a0 43; RV64IZbb-NEXT: mulw a1, a1, a2 44; RV64IZbb-NEXT: maxu a0, a0, a1 45; RV64IZbb-NEXT: sub a0, a0, a1 46; RV64IZbb-NEXT: ret 47 %a = mul i32 %y, %z 48 %tmp = call i32 @llvm.usub.sat.i32(i32 %x, i32 %a) 49 ret i32 %tmp 50} 51 52define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind { 53; RV32I-LABEL: func64: 54; RV32I: # %bb.0: 55; RV32I-NEXT: sltu a2, a0, a4 56; RV32I-NEXT: sub a3, a1, a5 57; RV32I-NEXT: sub a2, a3, a2 58; RV32I-NEXT: sub a3, a0, a4 59; RV32I-NEXT: beq a2, a1, .LBB1_2 60; RV32I-NEXT: # %bb.1: 61; RV32I-NEXT: sltu a0, a1, a2 62; RV32I-NEXT: j .LBB1_3 63; RV32I-NEXT: .LBB1_2: 64; RV32I-NEXT: sltu a0, a0, a3 65; RV32I-NEXT: .LBB1_3: 66; RV32I-NEXT: addi a1, a0, -1 67; RV32I-NEXT: and a0, a1, a3 68; RV32I-NEXT: and a1, a1, a2 69; RV32I-NEXT: ret 70; 71; RV64I-LABEL: func64: 72; RV64I: # %bb.0: 73; RV64I-NEXT: sub a1, a0, a2 74; RV64I-NEXT: sltu a0, a0, a1 75; RV64I-NEXT: addi a0, a0, -1 76; RV64I-NEXT: and a0, a0, a1 77; RV64I-NEXT: ret 78; 79; RV32IZbb-LABEL: func64: 80; RV32IZbb: # %bb.0: 81; RV32IZbb-NEXT: sltu a2, a0, a4 82; RV32IZbb-NEXT: sub a3, a1, a5 83; RV32IZbb-NEXT: sub a2, a3, a2 84; RV32IZbb-NEXT: sub a3, a0, a4 85; RV32IZbb-NEXT: beq a2, a1, .LBB1_2 86; RV32IZbb-NEXT: # %bb.1: 87; RV32IZbb-NEXT: sltu a0, a1, a2 88; RV32IZbb-NEXT: j .LBB1_3 89; RV32IZbb-NEXT: .LBB1_2: 90; RV32IZbb-NEXT: sltu a0, a0, a3 91; RV32IZbb-NEXT: .LBB1_3: 92; RV32IZbb-NEXT: addi a1, a0, -1 93; RV32IZbb-NEXT: and a0, a1, a3 94; RV32IZbb-NEXT: and a1, a1, a2 95; RV32IZbb-NEXT: ret 96; 97; RV64IZbb-LABEL: func64: 98; RV64IZbb: # %bb.0: 99; RV64IZbb-NEXT: maxu a0, a0, a2 100; RV64IZbb-NEXT: sub a0, a0, a2 101; RV64IZbb-NEXT: ret 102 %a = mul i64 %y, %z 103 %tmp = call i64 @llvm.usub.sat.i64(i64 %x, i64 %z) 104 ret i64 %tmp 105} 106 107define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { 108; RV32I-LABEL: func16: 109; RV32I: # %bb.0: 110; RV32I-NEXT: lui a3, 16 111; RV32I-NEXT: mul a1, a1, a2 112; RV32I-NEXT: addi a3, a3, -1 113; RV32I-NEXT: and a0, a0, a3 114; RV32I-NEXT: and a1, a1, a3 115; RV32I-NEXT: sub a1, a0, a1 116; RV32I-NEXT: sltu a0, a0, a1 117; RV32I-NEXT: addi a0, a0, -1 118; RV32I-NEXT: and a0, a0, a1 119; RV32I-NEXT: ret 120; 121; RV64I-LABEL: func16: 122; RV64I: # %bb.0: 123; RV64I-NEXT: lui a3, 16 124; RV64I-NEXT: mul a1, a1, a2 125; RV64I-NEXT: addiw a3, a3, -1 126; RV64I-NEXT: and a0, a0, a3 127; RV64I-NEXT: and a1, a1, a3 128; RV64I-NEXT: sub a1, a0, a1 129; RV64I-NEXT: sltu a0, a0, a1 130; RV64I-NEXT: addi a0, a0, -1 131; RV64I-NEXT: and a0, a0, a1 132; RV64I-NEXT: ret 133; 134; RV32IZbb-LABEL: func16: 135; RV32IZbb: # %bb.0: 136; RV32IZbb-NEXT: zext.h a0, a0 137; RV32IZbb-NEXT: mul a1, a1, a2 138; RV32IZbb-NEXT: zext.h a1, a1 139; RV32IZbb-NEXT: maxu a0, a0, a1 140; RV32IZbb-NEXT: sub a0, a0, a1 141; RV32IZbb-NEXT: ret 142; 143; RV64IZbb-LABEL: func16: 144; RV64IZbb: # %bb.0: 145; RV64IZbb-NEXT: zext.h a0, a0 146; RV64IZbb-NEXT: mul a1, a1, a2 147; RV64IZbb-NEXT: zext.h a1, a1 148; RV64IZbb-NEXT: maxu a0, a0, a1 149; RV64IZbb-NEXT: sub a0, a0, a1 150; RV64IZbb-NEXT: ret 151 %a = mul i16 %y, %z 152 %tmp = call i16 @llvm.usub.sat.i16(i16 %x, i16 %a) 153 ret i16 %tmp 154} 155 156define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { 157; RV32I-LABEL: func8: 158; RV32I: # %bb.0: 159; RV32I-NEXT: andi a0, a0, 255 160; RV32I-NEXT: mul a1, a1, a2 161; RV32I-NEXT: andi a1, a1, 255 162; RV32I-NEXT: sub a1, a0, a1 163; RV32I-NEXT: sltu a0, a0, a1 164; RV32I-NEXT: addi a0, a0, -1 165; RV32I-NEXT: and a0, a0, a1 166; RV32I-NEXT: ret 167; 168; RV64I-LABEL: func8: 169; RV64I: # %bb.0: 170; RV64I-NEXT: andi a0, a0, 255 171; RV64I-NEXT: mul a1, a1, a2 172; RV64I-NEXT: andi a1, a1, 255 173; RV64I-NEXT: sub a1, a0, a1 174; RV64I-NEXT: sltu a0, a0, a1 175; RV64I-NEXT: addi a0, a0, -1 176; RV64I-NEXT: and a0, a0, a1 177; RV64I-NEXT: ret 178; 179; RV32IZbb-LABEL: func8: 180; RV32IZbb: # %bb.0: 181; RV32IZbb-NEXT: andi a0, a0, 255 182; RV32IZbb-NEXT: mul a1, a1, a2 183; RV32IZbb-NEXT: andi a1, a1, 255 184; RV32IZbb-NEXT: maxu a0, a0, a1 185; RV32IZbb-NEXT: sub a0, a0, a1 186; RV32IZbb-NEXT: ret 187; 188; RV64IZbb-LABEL: func8: 189; RV64IZbb: # %bb.0: 190; RV64IZbb-NEXT: andi a0, a0, 255 191; RV64IZbb-NEXT: mul a1, a1, a2 192; RV64IZbb-NEXT: andi a1, a1, 255 193; RV64IZbb-NEXT: maxu a0, a0, a1 194; RV64IZbb-NEXT: sub a0, a0, a1 195; RV64IZbb-NEXT: ret 196 %a = mul i8 %y, %z 197 %tmp = call i8 @llvm.usub.sat.i8(i8 %x, i8 %a) 198 ret i8 %tmp 199} 200 201define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { 202; RV32I-LABEL: func4: 203; RV32I: # %bb.0: 204; RV32I-NEXT: andi a0, a0, 15 205; RV32I-NEXT: mul a1, a1, a2 206; RV32I-NEXT: andi a1, a1, 15 207; RV32I-NEXT: sub a1, a0, a1 208; RV32I-NEXT: sltu a0, a0, a1 209; RV32I-NEXT: addi a0, a0, -1 210; RV32I-NEXT: and a0, a0, a1 211; RV32I-NEXT: ret 212; 213; RV64I-LABEL: func4: 214; RV64I: # %bb.0: 215; RV64I-NEXT: andi a0, a0, 15 216; RV64I-NEXT: mul a1, a1, a2 217; RV64I-NEXT: andi a1, a1, 15 218; RV64I-NEXT: sub a1, a0, a1 219; RV64I-NEXT: sltu a0, a0, a1 220; RV64I-NEXT: addi a0, a0, -1 221; RV64I-NEXT: and a0, a0, a1 222; RV64I-NEXT: ret 223; 224; RV32IZbb-LABEL: func4: 225; RV32IZbb: # %bb.0: 226; RV32IZbb-NEXT: andi a0, a0, 15 227; RV32IZbb-NEXT: mul a1, a1, a2 228; RV32IZbb-NEXT: andi a1, a1, 15 229; RV32IZbb-NEXT: maxu a0, a0, a1 230; RV32IZbb-NEXT: sub a0, a0, a1 231; RV32IZbb-NEXT: ret 232; 233; RV64IZbb-LABEL: func4: 234; RV64IZbb: # %bb.0: 235; RV64IZbb-NEXT: andi a0, a0, 15 236; RV64IZbb-NEXT: mul a1, a1, a2 237; RV64IZbb-NEXT: andi a1, a1, 15 238; RV64IZbb-NEXT: maxu a0, a0, a1 239; RV64IZbb-NEXT: sub a0, a0, a1 240; RV64IZbb-NEXT: ret 241 %a = mul i4 %y, %z 242 %tmp = call i4 @llvm.usub.sat.i4(i4 %x, i4 %a) 243 ret i4 %tmp 244} 245