1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=instcombine -S | FileCheck %s 3 4 5define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) { 6; CHECK-LABEL: @foo( 7; CHECK-NEXT: [[E_NOT:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] 8; CHECK-NEXT: [[J:%.*]] = select i1 [[E_NOT]], i32 [[C:%.*]], i32 [[D:%.*]] 9; CHECK-NEXT: ret i32 [[J]] 10; 11 %e = icmp slt i32 %a, %b 12 %f = sext i1 %e to i32 13 %g = and i32 %c, %f 14 %h = xor i32 %f, -1 15 %i = and i32 %d, %h 16 %j = or i32 %g, %i 17 ret i32 %j 18} 19 20define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) { 21; CHECK-LABEL: @bar( 22; CHECK-NEXT: [[E_NOT:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] 23; CHECK-NEXT: [[J:%.*]] = select i1 [[E_NOT]], i32 [[C:%.*]], i32 [[D:%.*]] 24; CHECK-NEXT: ret i32 [[J]] 25; 26 %e = icmp slt i32 %a, %b 27 %f = sext i1 %e to i32 28 %g = and i32 %c, %f 29 %h = xor i32 %f, -1 30 %i = and i32 %d, %h 31 %j = or i32 %i, %g 32 ret i32 %j 33} 34 35define i32 @goo(i32 %a, i32 %b, i32 %c, i32 %d) { 36; CHECK-LABEL: @goo( 37; CHECK-NEXT: [[T0_NOT:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] 38; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0_NOT]], i32 [[C:%.*]], i32 [[D:%.*]] 39; CHECK-NEXT: ret i32 [[T3]] 40; 41 %t0 = icmp slt i32 %a, %b 42 %iftmp.0.0 = select i1 %t0, i32 -1, i32 0 43 %t1 = and i32 %iftmp.0.0, %c 44 %not = xor i32 %iftmp.0.0, -1 45 %t2 = and i32 %not, %d 46 %t3 = or i32 %t1, %t2 47 ret i32 %t3 48} 49 50define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) { 51; CHECK-LABEL: @poo( 52; CHECK-NEXT: [[T0_NOT:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] 53; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0_NOT]], i32 [[C:%.*]], i32 [[D:%.*]] 54; CHECK-NEXT: ret i32 [[T3]] 55; 56 %t0 = icmp slt i32 %a, %b 57 %iftmp.0.0 = select i1 %t0, i32 -1, i32 0 58 %t1 = and i32 %iftmp.0.0, %c 59 %iftmp = select i1 %t0, i32 0, i32 -1 60 %t2 = and i32 %iftmp, %d 61 %t3 = or i32 %t1, %t2 62 ret i32 %t3 63} 64 65; PR32791 - https://bugs.llvm.org//show_bug.cgi?id=32791 66; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this. 67 68define i32 @fold_inverted_icmp_preds(i32 %a, i32 %b, i32 %c, i32 %d) { 69; CHECK-LABEL: @fold_inverted_icmp_preds( 70; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] 71; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 [[C:%.*]], i32 0 72; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp slt i32 [[A]], [[B]] 73; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2_NOT]], i32 0, i32 [[D:%.*]] 74; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]] 75; CHECK-NEXT: ret i32 [[OR]] 76; 77 %cmp1 = icmp slt i32 %a, %b 78 %sel1 = select i1 %cmp1, i32 %c, i32 0 79 %cmp2 = icmp sge i32 %a, %b 80 %sel2 = select i1 %cmp2, i32 %d, i32 0 81 %or = or i32 %sel1, %sel2 82 ret i32 %or 83} 84 85; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this. 86 87define i32 @fold_inverted_icmp_preds_reverse(i32 %a, i32 %b, i32 %c, i32 %d) { 88; CHECK-LABEL: @fold_inverted_icmp_preds_reverse( 89; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] 90; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 [[C:%.*]] 91; CHECK-NEXT: [[CMP2_NOT:%.*]] = icmp slt i32 [[A]], [[B]] 92; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2_NOT]], i32 [[D:%.*]], i32 0 93; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]] 94; CHECK-NEXT: ret i32 [[OR]] 95; 96 %cmp1 = icmp slt i32 %a, %b 97 %sel1 = select i1 %cmp1, i32 0, i32 %c 98 %cmp2 = icmp sge i32 %a, %b 99 %sel2 = select i1 %cmp2, i32 0, i32 %d 100 %or = or i32 %sel1, %sel2 101 ret i32 %or 102} 103 104; TODO: Should fcmp have the same sort of predicate canonicalization as icmp? 105 106define i32 @fold_inverted_fcmp_preds(float %a, float %b, i32 %c, i32 %d) { 107; CHECK-LABEL: @fold_inverted_fcmp_preds( 108; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[A:%.*]], [[B:%.*]] 109; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 [[C:%.*]], i32 0 110; CHECK-NEXT: [[CMP2:%.*]] = fcmp uge float [[A]], [[B]] 111; CHECK-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 [[D:%.*]], i32 0 112; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEL1]], [[SEL2]] 113; CHECK-NEXT: ret i32 [[OR]] 114; 115 %cmp1 = fcmp olt float %a, %b 116 %sel1 = select i1 %cmp1, i32 %c, i32 0 117 %cmp2 = fcmp uge float %a, %b 118 %sel2 = select i1 %cmp2, i32 %d, i32 0 119 %or = or i32 %sel1, %sel2 120 ret i32 %or 121} 122 123; The 2nd compare/select are canonicalized, so CSE and another round of instcombine or some other pass will fold this. 124 125define <2 x i32> @fold_inverted_icmp_vector_preds(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c, <2 x i32> %d) { 126; CHECK-LABEL: @fold_inverted_icmp_vector_preds( 127; CHECK-NEXT: [[CMP1_NOT:%.*]] = icmp eq <2 x i32> [[A:%.*]], [[B:%.*]] 128; CHECK-NEXT: [[SEL1:%.*]] = select <2 x i1> [[CMP1_NOT]], <2 x i32> zeroinitializer, <2 x i32> [[C:%.*]] 129; CHECK-NEXT: [[CMP2:%.*]] = icmp eq <2 x i32> [[A]], [[B]] 130; CHECK-NEXT: [[SEL2:%.*]] = select <2 x i1> [[CMP2]], <2 x i32> [[D:%.*]], <2 x i32> zeroinitializer 131; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[SEL1]], [[SEL2]] 132; CHECK-NEXT: ret <2 x i32> [[OR]] 133; 134 %cmp1 = icmp ne <2 x i32> %a, %b 135 %sel1 = select <2 x i1> %cmp1, <2 x i32> %c, <2 x i32> <i32 0, i32 0> 136 %cmp2 = icmp eq <2 x i32> %a, %b 137 %sel2 = select <2 x i1> %cmp2, <2 x i32> %d, <2 x i32> <i32 0, i32 0> 138 %or = or <2 x i32> %sel1, %sel2 139 ret <2 x i32> %or 140} 141 142define i32 @par(i32 %a, i32 %b, i32 %c, i32 %d) { 143; CHECK-LABEL: @par( 144; CHECK-NEXT: [[T0_NOT:%.*]] = icmp slt i32 [[A:%.*]], [[B:%.*]] 145; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0_NOT]], i32 [[C:%.*]], i32 [[D:%.*]] 146; CHECK-NEXT: ret i32 [[T3]] 147; 148 %t0 = icmp slt i32 %a, %b 149 %iftmp.1.0 = select i1 %t0, i32 -1, i32 0 150 %t1 = and i32 %iftmp.1.0, %c 151 %not = xor i32 %iftmp.1.0, -1 152 %t2 = and i32 %not, %d 153 %t3 = or i32 %t1, %t2 154 ret i32 %t3 155} 156 157; In the following tests (8 commutation variants), verify that a bitcast doesn't get 158; in the way of a select transform. These bitcasts are common in SSE/AVX and possibly 159; other vector code because of canonicalization to i64 elements for vectors. 160 161; The fptosi instructions are included to avoid commutation canonicalization based on 162; operator weight. Using another cast operator ensures that both operands of all logic 163; ops are equally weighted, and this ensures that we're testing all commutation 164; possibilities. 165 166define <2 x i64> @bitcast_select_swap0(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 167; CHECK-LABEL: @bitcast_select_swap0( 168; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 169; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 170; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 171; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 172; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 173; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 174; CHECK-NEXT: ret <2 x i64> [[OR]] 175; 176 %sia = fptosi <2 x double> %a to <2 x i64> 177 %sib = fptosi <2 x double> %b to <2 x i64> 178 %sext = sext <4 x i1> %cmp to <4 x i32> 179 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 180 %and1 = and <2 x i64> %bc1, %sia 181 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 182 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 183 %and2 = and <2 x i64> %bc2, %sib 184 %or = or <2 x i64> %and1, %and2 185 ret <2 x i64> %or 186} 187 188define <2 x i64> @bitcast_select_swap1(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 189; CHECK-LABEL: @bitcast_select_swap1( 190; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 191; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 192; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 193; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 194; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 195; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 196; CHECK-NEXT: ret <2 x i64> [[OR]] 197; 198 %sia = fptosi <2 x double> %a to <2 x i64> 199 %sib = fptosi <2 x double> %b to <2 x i64> 200 %sext = sext <4 x i1> %cmp to <4 x i32> 201 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 202 %and1 = and <2 x i64> %bc1, %sia 203 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 204 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 205 %and2 = and <2 x i64> %bc2, %sib 206 %or = or <2 x i64> %and2, %and1 207 ret <2 x i64> %or 208} 209 210define <2 x i64> @bitcast_select_swap2(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 211; CHECK-LABEL: @bitcast_select_swap2( 212; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 213; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 214; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 215; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 216; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 217; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 218; CHECK-NEXT: ret <2 x i64> [[OR]] 219; 220 %sia = fptosi <2 x double> %a to <2 x i64> 221 %sib = fptosi <2 x double> %b to <2 x i64> 222 %sext = sext <4 x i1> %cmp to <4 x i32> 223 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 224 %and1 = and <2 x i64> %bc1, %sia 225 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 226 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 227 %and2 = and <2 x i64> %sib, %bc2 228 %or = or <2 x i64> %and1, %and2 229 ret <2 x i64> %or 230} 231 232define <2 x i64> @bitcast_select_swap3(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 233; CHECK-LABEL: @bitcast_select_swap3( 234; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 235; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 236; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 237; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 238; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 239; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 240; CHECK-NEXT: ret <2 x i64> [[OR]] 241; 242 %sia = fptosi <2 x double> %a to <2 x i64> 243 %sib = fptosi <2 x double> %b to <2 x i64> 244 %sext = sext <4 x i1> %cmp to <4 x i32> 245 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 246 %and1 = and <2 x i64> %bc1, %sia 247 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 248 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 249 %and2 = and <2 x i64> %sib, %bc2 250 %or = or <2 x i64> %and2, %and1 251 ret <2 x i64> %or 252} 253 254define <2 x i64> @bitcast_select_swap4(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 255; CHECK-LABEL: @bitcast_select_swap4( 256; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 257; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 258; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 259; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 260; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 261; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 262; CHECK-NEXT: ret <2 x i64> [[OR]] 263; 264 %sia = fptosi <2 x double> %a to <2 x i64> 265 %sib = fptosi <2 x double> %b to <2 x i64> 266 %sext = sext <4 x i1> %cmp to <4 x i32> 267 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 268 %and1 = and <2 x i64> %sia, %bc1 269 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 270 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 271 %and2 = and <2 x i64> %bc2, %sib 272 %or = or <2 x i64> %and1, %and2 273 ret <2 x i64> %or 274} 275 276define <2 x i64> @bitcast_select_swap5(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 277; CHECK-LABEL: @bitcast_select_swap5( 278; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 279; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 280; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 281; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 282; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 283; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 284; CHECK-NEXT: ret <2 x i64> [[OR]] 285; 286 %sia = fptosi <2 x double> %a to <2 x i64> 287 %sib = fptosi <2 x double> %b to <2 x i64> 288 %sext = sext <4 x i1> %cmp to <4 x i32> 289 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 290 %and1 = and <2 x i64> %sia, %bc1 291 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 292 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 293 %and2 = and <2 x i64> %bc2, %sib 294 %or = or <2 x i64> %and2, %and1 295 ret <2 x i64> %or 296} 297 298define <2 x i64> @bitcast_select_swap6(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 299; CHECK-LABEL: @bitcast_select_swap6( 300; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 301; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 302; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 303; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 304; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 305; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 306; CHECK-NEXT: ret <2 x i64> [[OR]] 307; 308 %sia = fptosi <2 x double> %a to <2 x i64> 309 %sib = fptosi <2 x double> %b to <2 x i64> 310 %sext = sext <4 x i1> %cmp to <4 x i32> 311 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 312 %and1 = and <2 x i64> %sia, %bc1 313 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 314 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 315 %and2 = and <2 x i64> %sib, %bc2 316 %or = or <2 x i64> %and1, %and2 317 ret <2 x i64> %or 318} 319 320define <2 x i64> @bitcast_select_swap7(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 321; CHECK-LABEL: @bitcast_select_swap7( 322; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 323; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 324; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32> 325; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32> 326; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[CMP:%.*]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]] 327; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64> 328; CHECK-NEXT: ret <2 x i64> [[OR]] 329; 330 %sia = fptosi <2 x double> %a to <2 x i64> 331 %sib = fptosi <2 x double> %b to <2 x i64> 332 %sext = sext <4 x i1> %cmp to <4 x i32> 333 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 334 %and1 = and <2 x i64> %sia, %bc1 335 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 336 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 337 %and2 = and <2 x i64> %sib, %bc2 338 %or = or <2 x i64> %and2, %and1 339 ret <2 x i64> %or 340} 341 342define <2 x i64> @bitcast_select_multi_uses(<4 x i1> %cmp, <2 x i64> %a, <2 x i64> %b) { 343; CHECK-LABEL: @bitcast_select_multi_uses( 344; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP:%.*]] to <4 x i32> 345; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64> 346; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[A:%.*]], [[BC1]] 347; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64> 348; CHECK-NEXT: [[BC2:%.*]] = xor <2 x i64> [[TMP1]], splat (i64 -1) 349; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[B:%.*]], [[BC2]] 350; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]] 351; CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[AND2]], [[BC2]] 352; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[OR]], [[ADD]] 353; CHECK-NEXT: ret <2 x i64> [[SUB]] 354; 355 %sext = sext <4 x i1> %cmp to <4 x i32> 356 %bc1 = bitcast <4 x i32> %sext to <2 x i64> 357 %and1 = and <2 x i64> %a, %bc1 358 %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1> 359 %bc2 = bitcast <4 x i32> %neg to <2 x i64> 360 %and2 = and <2 x i64> %b, %bc2 361 %or = or <2 x i64> %and2, %and1 362 %add = add <2 x i64> %and2, %bc2 363 %sub = sub <2 x i64> %or, %add 364 ret <2 x i64> %sub 365} 366 367define i1 @bools(i1 %a, i1 %b, i1 %c) { 368; CHECK-LABEL: @bools( 369; CHECK-NEXT: [[OR:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]] 370; CHECK-NEXT: ret i1 [[OR]] 371; 372 %not = xor i1 %c, -1 373 %and1 = and i1 %not, %a 374 %and2 = and i1 %c, %b 375 %or = or i1 %and1, %and2 376 ret i1 %or 377} 378 379define i1 @bools_logical(i1 %a, i1 %b, i1 %c) { 380; CHECK-LABEL: @bools_logical( 381; CHECK-NEXT: [[OR:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]] 382; CHECK-NEXT: ret i1 [[OR]] 383; 384 %not = xor i1 %c, -1 385 %and1 = select i1 %not, i1 %a, i1 false 386 %and2 = select i1 %c, i1 %b, i1 false 387 %or = select i1 %and1, i1 true, i1 %and2 388 ret i1 %or 389} 390 391; Form a select if we know we can get replace 2 simple logic ops. 392 393define i1 @bools_multi_uses1(i1 %a, i1 %b, i1 %c) { 394; CHECK-LABEL: @bools_multi_uses1( 395; CHECK-NEXT: [[NOT:%.*]] = xor i1 [[C:%.*]], true 396; CHECK-NEXT: [[AND1:%.*]] = and i1 [[A:%.*]], [[NOT]] 397; CHECK-NEXT: [[OR:%.*]] = select i1 [[C]], i1 [[B:%.*]], i1 [[A]] 398; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[OR]], [[AND1]] 399; CHECK-NEXT: ret i1 [[XOR]] 400; 401 %not = xor i1 %c, -1 402 %and1 = and i1 %not, %a 403 %and2 = and i1 %c, %b 404 %or = or i1 %and1, %and2 405 %xor = xor i1 %or, %and1 406 ret i1 %xor 407} 408 409define i1 @bools_multi_uses1_logical(i1 %a, i1 %b, i1 %c) { 410; CHECK-LABEL: @bools_multi_uses1_logical( 411; CHECK-NEXT: [[NOT:%.*]] = xor i1 [[C:%.*]], true 412; CHECK-NEXT: [[AND1:%.*]] = select i1 [[NOT]], i1 [[A:%.*]], i1 false 413; CHECK-NEXT: [[OR:%.*]] = select i1 [[C]], i1 [[B:%.*]], i1 [[A]] 414; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[OR]], [[AND1]] 415; CHECK-NEXT: ret i1 [[XOR]] 416; 417 %not = xor i1 %c, -1 418 %and1 = select i1 %not, i1 %a, i1 false 419 %and2 = select i1 %c, i1 %b, i1 false 420 %or = select i1 %and1, i1 true, i1 %and2 421 %xor = xor i1 %or, %and1 422 ret i1 %xor 423} 424 425; Don't replace a cheap logic op with a potentially expensive select 426; unless we can also eliminate one of the other original ops. 427 428define i1 @bools_multi_uses2(i1 %a, i1 %b, i1 %c) { 429; CHECK-LABEL: @bools_multi_uses2( 430; CHECK-NEXT: [[OR:%.*]] = select i1 [[C:%.*]], i1 [[B:%.*]], i1 [[A:%.*]] 431; CHECK-NEXT: ret i1 [[OR]] 432; 433 %not = xor i1 %c, -1 434 %and1 = and i1 %not, %a 435 %and2 = and i1 %c, %b 436 %or = or i1 %and1, %and2 437 %add = add i1 %and1, %and2 438 %and3 = and i1 %or, %add 439 ret i1 %and3 440} 441 442define i1 @bools_multi_uses2_logical(i1 %a, i1 %b, i1 %c) { 443; CHECK-LABEL: @bools_multi_uses2_logical( 444; CHECK-NEXT: [[NOT:%.*]] = xor i1 [[C:%.*]], true 445; CHECK-NEXT: [[AND1:%.*]] = select i1 [[NOT]], i1 [[A:%.*]], i1 false 446; CHECK-NEXT: [[AND2:%.*]] = select i1 [[C]], i1 [[B:%.*]], i1 false 447; CHECK-NEXT: [[OR:%.*]] = select i1 [[C]], i1 [[B]], i1 [[A]] 448; CHECK-NEXT: [[ADD:%.*]] = xor i1 [[AND1]], [[AND2]] 449; CHECK-NEXT: [[AND3:%.*]] = select i1 [[OR]], i1 [[ADD]], i1 false 450; CHECK-NEXT: ret i1 [[AND3]] 451; 452 %not = xor i1 %c, -1 453 %and1 = select i1 %not, i1 %a, i1 false 454 %and2 = select i1 %c, i1 %b, i1 false 455 %or = select i1 %and1, i1 true, i1 %and2 456 %add = add i1 %and1, %and2 457 %and3 = select i1 %or, i1 %add, i1 false 458 ret i1 %and3 459} 460 461define <4 x i1> @vec_of_bools(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c) { 462; CHECK-LABEL: @vec_of_bools( 463; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[B:%.*]], <4 x i1> [[A:%.*]] 464; CHECK-NEXT: ret <4 x i1> [[OR]] 465; 466 %not = xor <4 x i1> %c, <i1 true, i1 true, i1 true, i1 true> 467 %and1 = and <4 x i1> %not, %a 468 %and2 = and <4 x i1> %b, %c 469 %or = or <4 x i1> %and2, %and1 470 ret <4 x i1> %or 471} 472 473define i4 @vec_of_casted_bools(i4 %a, i4 %b, <4 x i1> %c) { 474; CHECK-LABEL: @vec_of_casted_bools( 475; CHECK-NEXT: [[TMP1:%.*]] = bitcast i4 [[B:%.*]] to <4 x i1> 476; CHECK-NEXT: [[TMP2:%.*]] = bitcast i4 [[A:%.*]] to <4 x i1> 477; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[C:%.*]], <4 x i1> [[TMP1]], <4 x i1> [[TMP2]] 478; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i1> [[TMP3]] to i4 479; CHECK-NEXT: ret i4 [[OR]] 480; 481 %not = xor <4 x i1> %c, <i1 true, i1 true, i1 true, i1 true> 482 %bc1 = bitcast <4 x i1> %not to i4 483 %bc2 = bitcast <4 x i1> %c to i4 484 %and1 = and i4 %a, %bc1 485 %and2 = and i4 %bc2, %b 486 %or = or i4 %and1, %and2 487 ret i4 %or 488} 489 490; Inverted 'and' constants mean this is a select which is canonicalized to a shuffle. 491 492define <4 x i32> @vec_sel_consts(<4 x i32> %a, <4 x i32> %b) { 493; CHECK-LABEL: @vec_sel_consts( 494; CHECK-NEXT: [[OR:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3> 495; CHECK-NEXT: ret <4 x i32> [[OR]] 496; 497 %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 -1> 498 %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 0> 499 %or = or <4 x i32> %and1, %and2 500 ret <4 x i32> %or 501} 502 503define <3 x i129> @vec_sel_consts_weird(<3 x i129> %a, <3 x i129> %b) { 504; CHECK-LABEL: @vec_sel_consts_weird( 505; CHECK-NEXT: [[OR:%.*]] = shufflevector <3 x i129> [[A:%.*]], <3 x i129> [[B:%.*]], <3 x i32> <i32 0, i32 4, i32 2> 506; CHECK-NEXT: ret <3 x i129> [[OR]] 507; 508 %and1 = and <3 x i129> %a, <i129 -1, i129 0, i129 -1> 509 %and2 = and <3 x i129> %b, <i129 0, i129 -1, i129 0> 510 %or = or <3 x i129> %and2, %and1 511 ret <3 x i129> %or 512} 513 514; The mask elements must be inverted for this to be a select. 515 516define <4 x i32> @vec_not_sel_consts(<4 x i32> %a, <4 x i32> %b) { 517; CHECK-LABEL: @vec_not_sel_consts( 518; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> [[A:%.*]], <i32 -1, i32 0, i32 0, i32 0> 519; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> [[B:%.*]], <i32 0, i32 -1, i32 0, i32 -1> 520; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND1]], [[AND2]] 521; CHECK-NEXT: ret <4 x i32> [[OR]] 522; 523 %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0> 524 %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 0, i32 -1> 525 %or = or <4 x i32> %and1, %and2 526 ret <4 x i32> %or 527} 528 529define <4 x i32> @vec_not_sel_consts_undef_elts(<4 x i32> %a, <4 x i32> %b) { 530; CHECK-LABEL: @vec_not_sel_consts_undef_elts( 531; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> [[A:%.*]], <i32 -1, i32 undef, i32 0, i32 0> 532; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> [[B:%.*]], <i32 0, i32 -1, i32 0, i32 undef> 533; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND1]], [[AND2]] 534; CHECK-NEXT: ret <4 x i32> [[OR]] 535; 536 %and1 = and <4 x i32> %a, <i32 -1, i32 undef, i32 0, i32 0> 537 %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 0, i32 undef> 538 %or = or <4 x i32> %and1, %and2 539 ret <4 x i32> %or 540} 541 542; The inverted constants may be operands of xor instructions. 543 544define <4 x i32> @vec_sel_xor(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) { 545; CHECK-LABEL: @vec_sel_xor( 546; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[C:%.*]], <i1 false, i1 true, i1 true, i1 true> 547; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]] 548; CHECK-NEXT: ret <4 x i32> [[OR]] 549; 550 %mask = sext <4 x i1> %c to <4 x i32> 551 %mask_flip1 = xor <4 x i32> %mask, <i32 -1, i32 0, i32 0, i32 0> 552 %not_mask_flip1 = xor <4 x i32> %mask, <i32 0, i32 -1, i32 -1, i32 -1> 553 %and1 = and <4 x i32> %not_mask_flip1, %a 554 %and2 = and <4 x i32> %mask_flip1, %b 555 %or = or <4 x i32> %and1, %and2 556 ret <4 x i32> %or 557} 558 559; Allow the transform even if the mask values have multiple uses because 560; there's still a net reduction of instructions from removing the and/and/or. 561 562define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) { 563; CHECK-LABEL: @vec_sel_xor_multi_use( 564; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[C:%.*]], <i1 true, i1 false, i1 false, i1 false> 565; CHECK-NEXT: [[MASK_FLIP1:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32> 566; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[C]], <i1 false, i1 true, i1 true, i1 true> 567; CHECK-NEXT: [[OR:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]] 568; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[OR]], [[MASK_FLIP1]] 569; CHECK-NEXT: ret <4 x i32> [[ADD]] 570; 571 %mask = sext <4 x i1> %c to <4 x i32> 572 %mask_flip1 = xor <4 x i32> %mask, <i32 -1, i32 0, i32 0, i32 0> 573 %not_mask_flip1 = xor <4 x i32> %mask, <i32 0, i32 -1, i32 -1, i32 -1> 574 %and1 = and <4 x i32> %not_mask_flip1, %a 575 %and2 = and <4 x i32> %mask_flip1, %b 576 %or = or <4 x i32> %and1, %and2 577 %add = add <4 x i32> %or, %mask_flip1 578 ret <4 x i32> %add 579} 580 581; The 'ashr' guarantees that we have a bitmask, so this is select with truncated condition. 582 583define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) { 584; CHECK-LABEL: @allSignBits( 585; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt i32 [[COND:%.*]], 0 586; CHECK-NEXT: [[A1:%.*]] = select i1 [[ISNEG1]], i32 [[TVAL:%.*]], i32 0 587; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[COND]], 0 588; CHECK-NEXT: [[A2:%.*]] = select i1 [[ISNEG]], i32 0, i32 [[FVAL:%.*]] 589; CHECK-NEXT: [[SEL:%.*]] = or i32 [[A1]], [[A2]] 590; CHECK-NEXT: ret i32 [[SEL]] 591; 592 %bitmask = ashr i32 %cond, 31 593 %not_bitmask = xor i32 %bitmask, -1 594 %a1 = and i32 %tval, %bitmask 595 %a2 = and i32 %not_bitmask, %fval 596 %sel = or i32 %a1, %a2 597 ret i32 %sel 598} 599 600define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) { 601; CHECK-LABEL: @allSignBits_vec( 602; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer 603; CHECK-NEXT: [[A1:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i8> [[TVAL:%.*]], <4 x i8> zeroinitializer 604; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <4 x i8> [[COND]], zeroinitializer 605; CHECK-NEXT: [[A2:%.*]] = select <4 x i1> [[ISNEG]], <4 x i8> zeroinitializer, <4 x i8> [[FVAL:%.*]] 606; CHECK-NEXT: [[SEL:%.*]] = or <4 x i8> [[A2]], [[A1]] 607; CHECK-NEXT: ret <4 x i8> [[SEL]] 608; 609 %bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7> 610 %not_bitmask = xor <4 x i8> %bitmask, <i8 -1, i8 -1, i8 -1, i8 -1> 611 %a1 = and <4 x i8> %tval, %bitmask 612 %a2 = and <4 x i8> %fval, %not_bitmask 613 %sel = or <4 x i8> %a2, %a1 614 ret <4 x i8> %sel 615} 616 617; Negative test - make sure that bitcasts from FP do not cause a crash. 618 619define <2 x i64> @fp_bitcast(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) { 620; CHECK-LABEL: @fp_bitcast( 621; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> [[A:%.*]] to <2 x i64> 622; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> [[B:%.*]] to <2 x i64> 623; CHECK-NEXT: [[BC1:%.*]] = bitcast <2 x double> [[A]] to <2 x i64> 624; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[SIA]], [[BC1]] 625; CHECK-NEXT: [[BC2:%.*]] = bitcast <2 x double> [[B]] to <2 x i64> 626; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[SIB]], [[BC2]] 627; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]] 628; CHECK-NEXT: ret <2 x i64> [[OR]] 629; 630 %sia = fptosi <2 x double> %a to <2 x i64> 631 %sib = fptosi <2 x double> %b to <2 x i64> 632 %bc1 = bitcast <2 x double> %a to <2 x i64> 633 %and1 = and <2 x i64> %sia, %bc1 634 %bc2 = bitcast <2 x double> %b to <2 x i64> 635 %and2 = and <2 x i64> %sib, %bc2 636 %or = or <2 x i64> %and2, %and1 637 ret <2 x i64> %or 638} 639 640define <4 x i32> @computesignbits_through_shuffles(<4 x float> %x, <4 x float> %y, <4 x float> %z) { 641; CHECK-LABEL: @computesignbits_through_shuffles( 642; CHECK-NEXT: [[CMP:%.*]] = fcmp ole <4 x float> [[X:%.*]], [[Y:%.*]] 643; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 644; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 645; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[SEXT]], <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 646; CHECK-NEXT: [[SHUF_OR1:%.*]] = or <4 x i32> [[S1]], [[S2]] 647; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 648; CHECK-NEXT: [[S4:%.*]] = shufflevector <4 x i32> [[SHUF_OR1]], <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 649; CHECK-NEXT: [[SHUF_OR2:%.*]] = or <4 x i32> [[S3]], [[S4]] 650; CHECK-NEXT: [[TMP1:%.*]] = trunc nsw <4 x i32> [[SHUF_OR2]] to <4 x i1> 651; CHECK-NEXT: [[SEL_V:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[Z:%.*]], <4 x float> [[X]] 652; CHECK-NEXT: [[SEL:%.*]] = bitcast <4 x float> [[SEL_V]] to <4 x i32> 653; CHECK-NEXT: ret <4 x i32> [[SEL]] 654; 655 %cmp = fcmp ole <4 x float> %x, %y 656 %sext = sext <4 x i1> %cmp to <4 x i32> 657 %s1 = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 658 %s2 = shufflevector <4 x i32> %sext, <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 659 %shuf_or1 = or <4 x i32> %s1, %s2 660 %s3 = shufflevector <4 x i32> %shuf_or1, <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 1> 661 %s4 = shufflevector <4 x i32> %shuf_or1, <4 x i32> poison, <4 x i32> <i32 2, i32 2, i32 3, i32 3> 662 %shuf_or2 = or <4 x i32> %s3, %s4 663 %not_or2 = xor <4 x i32> %shuf_or2, <i32 -1, i32 -1, i32 -1, i32 -1> 664 %xbc = bitcast <4 x float> %x to <4 x i32> 665 %zbc = bitcast <4 x float> %z to <4 x i32> 666 %and1 = and <4 x i32> %not_or2, %xbc 667 %and2 = and <4 x i32> %shuf_or2, %zbc 668 %sel = or <4 x i32> %and1, %and2 669 ret <4 x i32> %sel 670} 671 672define <4 x i32> @computesignbits_through_two_input_shuffle(<4 x i32> %x, <4 x i32> %y, <4 x i1> %cond1, <4 x i1> %cond2) { 673; CHECK-LABEL: @computesignbits_through_two_input_shuffle( 674; CHECK-NEXT: [[COND:%.*]] = shufflevector <4 x i1> [[COND1:%.*]], <4 x i1> [[COND2:%.*]], <4 x i32> <i32 0, i32 2, i32 4, i32 6> 675; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[COND]], <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]] 676; CHECK-NEXT: ret <4 x i32> [[SEL]] 677; 678 %sext1 = sext <4 x i1> %cond1 to <4 x i32> 679 %sext2 = sext <4 x i1> %cond2 to <4 x i32> 680 %cond = shufflevector <4 x i32> %sext1, <4 x i32> %sext2, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 681 %notcond = xor <4 x i32> %cond, <i32 -1, i32 -1, i32 -1, i32 -1> 682 %and1 = and <4 x i32> %notcond, %x 683 %and2 = and <4 x i32> %cond, %y 684 %sel = or <4 x i32> %and1, %and2 685 ret <4 x i32> %sel 686} 687 688