1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv64 -mattr=+zbb -O3 < %s \ 3; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64I 4; RUN: llc -mtriple=riscv64 -mattr=+zbb,+f -target-abi=lp64f -O3 < %s \ 5; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-RV64IF 6; Tests aimed to check optimization which combines 7; two comparison operations and logic operation into 8; one select(min/max) operation and one comparison 9; operaion. 10 11; 4 patterns below will be converted to umin+less. 12define i1 @ulo(i64 %c, i64 %a, i64 %b) { 13; CHECK-LABEL: ulo: 14; CHECK: # %bb.0: 15; CHECK-NEXT: minu a1, a1, a2 16; CHECK-NEXT: sltu a0, a1, a0 17; CHECK-NEXT: ret 18 %l0 = icmp ult i64 %a, %c 19 %l1 = icmp ult i64 %b, %c 20 %res = or i1 %l0, %l1 21 ret i1 %res 22} 23 24define i1 @ulo_swap1(i64 %c, i64 %a, i64 %b) { 25; CHECK-LABEL: ulo_swap1: 26; CHECK: # %bb.0: 27; CHECK-NEXT: minu a1, a1, a2 28; CHECK-NEXT: sltu a0, a1, a0 29; CHECK-NEXT: ret 30 %l0 = icmp ugt i64 %c, %a 31 %l1 = icmp ult i64 %b, %c 32 %res = or i1 %l0, %l1 33 ret i1 %res 34} 35 36define i1 @ulo_swap2(i64 %c, i64 %a, i64 %b) { 37; CHECK-LABEL: ulo_swap2: 38; CHECK: # %bb.0: 39; CHECK-NEXT: minu a1, a1, a2 40; CHECK-NEXT: sltu a0, a1, a0 41; CHECK-NEXT: ret 42 %l0 = icmp ult i64 %a, %c 43 %l1 = icmp ugt i64 %c, %b 44 %res = or i1 %l0, %l1 45 ret i1 %res 46} 47 48define i1 @ulo_swap12(i64 %c, i64 %a, i64 %b) { 49; CHECK-LABEL: ulo_swap12: 50; CHECK: # %bb.0: 51; CHECK-NEXT: minu a1, a1, a2 52; CHECK-NEXT: sltu a0, a1, a0 53; CHECK-NEXT: ret 54 %l0 = icmp ugt i64 %c, %a 55 %l1 = icmp ugt i64 %c, %b 56 %res = or i1 %l0, %l1 57 ret i1 %res 58} 59 60; 4 patterns below will be converted to umax+less. 61define i1 @ula(i64 %c, i64 %a, i64 %b) { 62; CHECK-LABEL: ula: 63; CHECK: # %bb.0: 64; CHECK-NEXT: maxu a1, a1, a2 65; CHECK-NEXT: sltu a0, a1, a0 66; CHECK-NEXT: ret 67 %l0 = icmp ult i64 %a, %c 68 %l1 = icmp ult i64 %b, %c 69 %res = and i1 %l0, %l1 70 ret i1 %res 71} 72 73define i1 @ula_swap1(i64 %c, i64 %a, i64 %b) { 74; CHECK-LABEL: ula_swap1: 75; CHECK: # %bb.0: 76; CHECK-NEXT: maxu a1, a1, a2 77; CHECK-NEXT: sltu a0, a1, a0 78; CHECK-NEXT: ret 79 %l0 = icmp ugt i64 %c, %a 80 %l1 = icmp ult i64 %b, %c 81 %res = and i1 %l0, %l1 82 ret i1 %res 83} 84 85define i1 @ula_swap2(i64 %c, i64 %a, i64 %b) { 86; CHECK-LABEL: ula_swap2: 87; CHECK: # %bb.0: 88; CHECK-NEXT: maxu a1, a1, a2 89; CHECK-NEXT: sltu a0, a1, a0 90; CHECK-NEXT: ret 91 %l0 = icmp ult i64 %a, %c 92 %l1 = icmp ugt i64 %c, %b 93 %res = and i1 %l0, %l1 94 ret i1 %res 95} 96 97define i1 @ula_swap12(i64 %c, i64 %a, i64 %b) { 98; CHECK-LABEL: ula_swap12: 99; CHECK: # %bb.0: 100; CHECK-NEXT: maxu a1, a1, a2 101; CHECK-NEXT: sltu a0, a1, a0 102; CHECK-NEXT: ret 103 %l0 = icmp ugt i64 %c, %a 104 %l1 = icmp ugt i64 %c, %b 105 %res = and i1 %l0, %l1 106 ret i1 %res 107} 108 109; 4 patterns below will be converted to umax+greater 110; (greater will be converted to setult somehow) 111define i1 @ugo(i64 %c, i64 %a, i64 %b) { 112; CHECK-LABEL: ugo: 113; CHECK: # %bb.0: 114; CHECK-NEXT: maxu a1, a1, a2 115; CHECK-NEXT: sltu a0, a0, a1 116; CHECK-NEXT: ret 117 %l0 = icmp ugt i64 %a, %c 118 %l1 = icmp ugt i64 %b, %c 119 %res = or i1 %l0, %l1 120 ret i1 %res 121} 122 123define i1 @ugo_swap1(i64 %c, i64 %a, i64 %b) { 124; CHECK-LABEL: ugo_swap1: 125; CHECK: # %bb.0: 126; CHECK-NEXT: maxu a1, a1, a2 127; CHECK-NEXT: sltu a0, a0, a1 128; CHECK-NEXT: ret 129 %l0 = icmp ult i64 %c, %a 130 %l1 = icmp ugt i64 %b, %c 131 %res = or i1 %l0, %l1 132 ret i1 %res 133} 134 135define i1 @ugo_swap2(i64 %c, i64 %a, i64 %b) { 136; CHECK-LABEL: ugo_swap2: 137; CHECK: # %bb.0: 138; CHECK-NEXT: maxu a1, a1, a2 139; CHECK-NEXT: sltu a0, a0, a1 140; CHECK-NEXT: ret 141 %l0 = icmp ugt i64 %a, %c 142 %l1 = icmp ult i64 %c, %b 143 %res = or i1 %l0, %l1 144 ret i1 %res 145} 146 147define i1 @ugo_swap12(i64 %c, i64 %a, i64 %b) { 148; CHECK-LABEL: ugo_swap12: 149; CHECK: # %bb.0: 150; CHECK-NEXT: maxu a1, a1, a2 151; CHECK-NEXT: sltu a0, a0, a1 152; CHECK-NEXT: ret 153 %l0 = icmp ult i64 %c, %a 154 %l1 = icmp ult i64 %c, %b 155 %res = or i1 %l0, %l1 156 ret i1 %res 157} 158 159; Pattern below will be converted to umin+greater or equal 160; (greater will be converted to setult somehow) 161define i1 @ugea(i64 %c, i64 %a, i64 %b) { 162; CHECK-LABEL: ugea: 163; CHECK: # %bb.0: 164; CHECK-NEXT: minu a1, a1, a2 165; CHECK-NEXT: sltu a0, a1, a0 166; CHECK-NEXT: xori a0, a0, 1 167; CHECK-NEXT: ret 168 %l0 = icmp uge i64 %a, %c 169 %l1 = icmp uge i64 %b, %c 170 %res = and i1 %l0, %l1 171 ret i1 %res 172} 173 174; Pattern below will be converted to umin+greater 175; (greater will be converted to setult somehow) 176define i1 @uga(i64 %c, i64 %a, i64 %b) { 177; CHECK-LABEL: uga: 178; CHECK: # %bb.0: 179; CHECK-NEXT: minu a1, a1, a2 180; CHECK-NEXT: sltu a0, a0, a1 181; CHECK-NEXT: ret 182 %l0 = icmp ugt i64 %a, %c 183 %l1 = icmp ugt i64 %b, %c 184 %res = and i1 %l0, %l1 185 ret i1 %res 186} 187 188; Patterns below will be converted to smax+less. 189; Sign check. 190define i1 @sla(i64 %c, i64 %a, i64 %b) { 191; CHECK-LABEL: sla: 192; CHECK: # %bb.0: 193; CHECK-NEXT: max a1, a1, a2 194; CHECK-NEXT: slt a0, a1, a0 195; CHECK-NEXT: ret 196 %l0 = icmp slt i64 %a, %c 197 %l1 = icmp slt i64 %b, %c 198 %res = and i1 %l0, %l1 199 ret i1 %res 200} 201 202; Negative test 203; Float check. 204define i1 @flo(float %c, float %a, float %b) { 205; CHECK-RV64I-LABEL: flo: 206; CHECK-RV64I: # %bb.0: 207; CHECK-RV64I-NEXT: addi sp, sp, -32 208; CHECK-RV64I-NEXT: .cfi_def_cfa_offset 32 209; CHECK-RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 210; CHECK-RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 211; CHECK-RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 212; CHECK-RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill 213; CHECK-RV64I-NEXT: .cfi_offset ra, -8 214; CHECK-RV64I-NEXT: .cfi_offset s0, -16 215; CHECK-RV64I-NEXT: .cfi_offset s1, -24 216; CHECK-RV64I-NEXT: .cfi_offset s2, -32 217; CHECK-RV64I-NEXT: mv s0, a2 218; CHECK-RV64I-NEXT: mv s1, a0 219; CHECK-RV64I-NEXT: mv a0, a1 220; CHECK-RV64I-NEXT: mv a1, s1 221; CHECK-RV64I-NEXT: call __gesf2 222; CHECK-RV64I-NEXT: mv s2, a0 223; CHECK-RV64I-NEXT: mv a0, s0 224; CHECK-RV64I-NEXT: mv a1, s1 225; CHECK-RV64I-NEXT: call __gesf2 226; CHECK-RV64I-NEXT: or a0, s2, a0 227; CHECK-RV64I-NEXT: slti a0, a0, 0 228; CHECK-RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 229; CHECK-RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 230; CHECK-RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 231; CHECK-RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload 232; CHECK-RV64I-NEXT: .cfi_restore ra 233; CHECK-RV64I-NEXT: .cfi_restore s0 234; CHECK-RV64I-NEXT: .cfi_restore s1 235; CHECK-RV64I-NEXT: .cfi_restore s2 236; CHECK-RV64I-NEXT: addi sp, sp, 32 237; CHECK-RV64I-NEXT: .cfi_def_cfa_offset 0 238; CHECK-RV64I-NEXT: ret 239; 240; CHECK-RV64IF-LABEL: flo: 241; CHECK-RV64IF: # %bb.0: 242; CHECK-RV64IF-NEXT: fle.s a0, fa0, fa1 243; CHECK-RV64IF-NEXT: fle.s a1, fa0, fa2 244; CHECK-RV64IF-NEXT: and a0, a0, a1 245; CHECK-RV64IF-NEXT: xori a0, a0, 1 246; CHECK-RV64IF-NEXT: ret 247 %l0 = fcmp ult float %a, %c 248 %l1 = fcmp ult float %b, %c 249 %res = or i1 %l0, %l1 250 ret i1 %res 251} 252 253; Negative test 254; Double check. 255define i1 @dlo(double %c, double %a, double %b) { 256; CHECK-LABEL: dlo: 257; CHECK: # %bb.0: 258; CHECK-NEXT: addi sp, sp, -32 259; CHECK-NEXT: .cfi_def_cfa_offset 32 260; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 261; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 262; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 263; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill 264; CHECK-NEXT: .cfi_offset ra, -8 265; CHECK-NEXT: .cfi_offset s0, -16 266; CHECK-NEXT: .cfi_offset s1, -24 267; CHECK-NEXT: .cfi_offset s2, -32 268; CHECK-NEXT: mv s0, a2 269; CHECK-NEXT: mv s1, a0 270; CHECK-NEXT: mv a0, a1 271; CHECK-NEXT: mv a1, s1 272; CHECK-NEXT: call __gedf2 273; CHECK-NEXT: mv s2, a0 274; CHECK-NEXT: mv a0, s0 275; CHECK-NEXT: mv a1, s1 276; CHECK-NEXT: call __gedf2 277; CHECK-NEXT: or a0, s2, a0 278; CHECK-NEXT: slti a0, a0, 0 279; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 280; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 281; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 282; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload 283; CHECK-NEXT: .cfi_restore ra 284; CHECK-NEXT: .cfi_restore s0 285; CHECK-NEXT: .cfi_restore s1 286; CHECK-NEXT: .cfi_restore s2 287; CHECK-NEXT: addi sp, sp, 32 288; CHECK-NEXT: .cfi_def_cfa_offset 0 289; CHECK-NEXT: ret 290 %l0 = fcmp ult double %a, %c 291 %l1 = fcmp ult double %b, %c 292 %res = or i1 %l0, %l1 293 ret i1 %res 294} 295 296; Negative test 297; More than one user 298define i1 @multi_user(i64 %c, i64 %a, i64 %b) { 299; CHECK-LABEL: multi_user: 300; CHECK: # %bb.0: 301; CHECK-NEXT: sltu a1, a1, a0 302; CHECK-NEXT: sltu a0, a2, a0 303; CHECK-NEXT: or a0, a1, a0 304; CHECK-NEXT: and a0, a1, a0 305; CHECK-NEXT: ret 306 %l0 = icmp ugt i64 %c, %a 307 %l1 = icmp ult i64 %b, %c 308 %res = or i1 %l0, %l1 309 310 %out = and i1 %l0, %res 311 ret i1 %out 312} 313 314; Negative test 315; No same comparations 316define i1 @no_same_ops(i64 %c, i64 %a, i64 %b) { 317; CHECK-LABEL: no_same_ops: 318; CHECK: # %bb.0: 319; CHECK-NEXT: sltu a1, a0, a1 320; CHECK-NEXT: sltu a0, a2, a0 321; CHECK-NEXT: or a0, a1, a0 322; CHECK-NEXT: ret 323 %l0 = icmp ult i64 %c, %a 324 %l1 = icmp ugt i64 %c, %b 325 %res = or i1 %l0, %l1 326 ret i1 %res 327} 328