1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2 5 6; fold (add x, 0) -> x 7define <4 x i32> @combine_vec_add_to_zero(<4 x i32> %a) { 8; CHECK-LABEL: combine_vec_add_to_zero: 9; CHECK: # %bb.0: 10; CHECK-NEXT: retq 11 %1 = add <4 x i32> %a, zeroinitializer 12 ret <4 x i32> %1 13} 14 15; fold ((c1-A)+c2) -> (c1+c2)-A 16define <4 x i32> @combine_vec_add_constant_sub(<4 x i32> %a) { 17; SSE-LABEL: combine_vec_add_constant_sub: 18; SSE: # %bb.0: 19; SSE-NEXT: pmovsxbd {{.*#+}} xmm1 = [0,2,4,6] 20; SSE-NEXT: psubd %xmm0, %xmm1 21; SSE-NEXT: movdqa %xmm1, %xmm0 22; SSE-NEXT: retq 23; 24; AVX-LABEL: combine_vec_add_constant_sub: 25; AVX: # %bb.0: 26; AVX-NEXT: vpmovsxbd {{.*#+}} xmm1 = [0,2,4,6] 27; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0 28; AVX-NEXT: retq 29 %1 = sub <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %a 30 %2 = add <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %1 31 ret <4 x i32> %2 32} 33 34; fold ((0-A) + B) -> B-A 35define <4 x i32> @combine_vec_add_neg0(<4 x i32> %a, <4 x i32> %b) { 36; SSE-LABEL: combine_vec_add_neg0: 37; SSE: # %bb.0: 38; SSE-NEXT: psubd %xmm0, %xmm1 39; SSE-NEXT: movdqa %xmm1, %xmm0 40; SSE-NEXT: retq 41; 42; AVX-LABEL: combine_vec_add_neg0: 43; AVX: # %bb.0: 44; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0 45; AVX-NEXT: retq 46 %1 = sub <4 x i32> zeroinitializer, %a 47 %2 = add <4 x i32> %1, %b 48 ret <4 x i32> %2 49} 50 51; fold (A + (0-B)) -> A-B 52define <4 x i32> @combine_vec_add_neg1(<4 x i32> %a, <4 x i32> %b) { 53; SSE-LABEL: combine_vec_add_neg1: 54; SSE: # %bb.0: 55; SSE-NEXT: psubd %xmm1, %xmm0 56; SSE-NEXT: retq 57; 58; AVX-LABEL: combine_vec_add_neg1: 59; AVX: # %bb.0: 60; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 61; AVX-NEXT: retq 62 %1 = sub <4 x i32> zeroinitializer, %b 63 %2 = add <4 x i32> %a, %1 64 ret <4 x i32> %2 65} 66 67; fold (A+(B-A)) -> B 68define <4 x i32> @combine_vec_add_sub0(<4 x i32> %a, <4 x i32> %b) { 69; SSE-LABEL: combine_vec_add_sub0: 70; SSE: # %bb.0: 71; SSE-NEXT: movaps %xmm1, %xmm0 72; SSE-NEXT: retq 73; 74; AVX-LABEL: combine_vec_add_sub0: 75; AVX: # %bb.0: 76; AVX-NEXT: vmovaps %xmm1, %xmm0 77; AVX-NEXT: retq 78 %1 = sub <4 x i32> %b, %a 79 %2 = add <4 x i32> %a, %1 80 ret <4 x i32> %2 81} 82 83; fold ((B-A)+A) -> B 84define <4 x i32> @combine_vec_add_sub1(<4 x i32> %a, <4 x i32> %b) { 85; SSE-LABEL: combine_vec_add_sub1: 86; SSE: # %bb.0: 87; SSE-NEXT: movaps %xmm1, %xmm0 88; SSE-NEXT: retq 89; 90; AVX-LABEL: combine_vec_add_sub1: 91; AVX: # %bb.0: 92; AVX-NEXT: vmovaps %xmm1, %xmm0 93; AVX-NEXT: retq 94 %1 = sub <4 x i32> %b, %a 95 %2 = add <4 x i32> %1, %a 96 ret <4 x i32> %2 97} 98 99; fold ((A-B)+(C-A)) -> (C-B) 100define <4 x i32> @combine_vec_add_sub_sub0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 101; SSE-LABEL: combine_vec_add_sub_sub0: 102; SSE: # %bb.0: 103; SSE-NEXT: movdqa %xmm2, %xmm0 104; SSE-NEXT: psubd %xmm1, %xmm0 105; SSE-NEXT: retq 106; 107; AVX-LABEL: combine_vec_add_sub_sub0: 108; AVX: # %bb.0: 109; AVX-NEXT: vpsubd %xmm1, %xmm2, %xmm0 110; AVX-NEXT: retq 111 %1 = sub <4 x i32> %a, %b 112 %2 = sub <4 x i32> %c, %a 113 %3 = add <4 x i32> %1, %2 114 ret <4 x i32> %3 115} 116 117; fold ((A-B)+(B-C)) -> (A-C) 118define <4 x i32> @combine_vec_add_sub_sub1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 119; SSE-LABEL: combine_vec_add_sub_sub1: 120; SSE: # %bb.0: 121; SSE-NEXT: psubd %xmm2, %xmm0 122; SSE-NEXT: retq 123; 124; AVX-LABEL: combine_vec_add_sub_sub1: 125; AVX: # %bb.0: 126; AVX-NEXT: vpsubd %xmm2, %xmm0, %xmm0 127; AVX-NEXT: retq 128 %1 = sub <4 x i32> %a, %b 129 %2 = sub <4 x i32> %b, %c 130 %3 = add <4 x i32> %1, %2 131 ret <4 x i32> %3 132} 133 134; fold (A+(B-(A+C))) to (B-C) 135define <4 x i32> @combine_vec_add_sub_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 136; SSE-LABEL: combine_vec_add_sub_add0: 137; SSE: # %bb.0: 138; SSE-NEXT: movdqa %xmm1, %xmm0 139; SSE-NEXT: psubd %xmm2, %xmm0 140; SSE-NEXT: retq 141; 142; AVX-LABEL: combine_vec_add_sub_add0: 143; AVX: # %bb.0: 144; AVX-NEXT: vpsubd %xmm2, %xmm1, %xmm0 145; AVX-NEXT: retq 146 %1 = add <4 x i32> %a, %c 147 %2 = sub <4 x i32> %b, %1 148 %3 = add <4 x i32> %a, %2 149 ret <4 x i32> %3 150} 151 152; fold (A+(B-(C+A))) to (B-C) 153define <4 x i32> @combine_vec_add_sub_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 154; SSE-LABEL: combine_vec_add_sub_add1: 155; SSE: # %bb.0: 156; SSE-NEXT: movdqa %xmm1, %xmm0 157; SSE-NEXT: psubd %xmm2, %xmm0 158; SSE-NEXT: retq 159; 160; AVX-LABEL: combine_vec_add_sub_add1: 161; AVX: # %bb.0: 162; AVX-NEXT: vpsubd %xmm2, %xmm1, %xmm0 163; AVX-NEXT: retq 164 %1 = add <4 x i32> %c, %a 165 %2 = sub <4 x i32> %b, %1 166 %3 = add <4 x i32> %a, %2 167 ret <4 x i32> %3 168} 169 170; fold (A+((B-A)+C)) to (B+C) 171define <4 x i32> @combine_vec_add_sub_add2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 172; SSE-LABEL: combine_vec_add_sub_add2: 173; SSE: # %bb.0: 174; SSE-NEXT: movdqa %xmm1, %xmm0 175; SSE-NEXT: paddd %xmm2, %xmm0 176; SSE-NEXT: retq 177; 178; AVX-LABEL: combine_vec_add_sub_add2: 179; AVX: # %bb.0: 180; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm0 181; AVX-NEXT: retq 182 %1 = sub <4 x i32> %b, %a 183 %2 = add <4 x i32> %1, %c 184 %3 = add <4 x i32> %a, %2 185 ret <4 x i32> %3 186} 187 188; fold (A+((B-A)-C)) to (B-C) 189define <4 x i32> @combine_vec_add_sub_add3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { 190; SSE-LABEL: combine_vec_add_sub_add3: 191; SSE: # %bb.0: 192; SSE-NEXT: movdqa %xmm1, %xmm0 193; SSE-NEXT: psubd %xmm2, %xmm0 194; SSE-NEXT: retq 195; 196; AVX-LABEL: combine_vec_add_sub_add3: 197; AVX: # %bb.0: 198; AVX-NEXT: vpsubd %xmm2, %xmm1, %xmm0 199; AVX-NEXT: retq 200 %1 = sub <4 x i32> %b, %a 201 %2 = sub <4 x i32> %1, %c 202 %3 = add <4 x i32> %a, %2 203 ret <4 x i32> %3 204} 205 206; fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 207define <4 x i32> @combine_vec_add_sub_sub(<4 x i32> %a, <4 x i32> %b, <4 x i32> %d) { 208; SSE-LABEL: combine_vec_add_sub_sub: 209; SSE: # %bb.0: 210; SSE-NEXT: paddd %xmm2, %xmm1 211; SSE-NEXT: psubd %xmm1, %xmm0 212; SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 213; SSE-NEXT: retq 214; 215; AVX-LABEL: combine_vec_add_sub_sub: 216; AVX: # %bb.0: 217; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1 218; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 219; AVX-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 220; AVX-NEXT: retq 221 %1 = sub <4 x i32> %a, %b 222 %2 = sub <4 x i32> <i32 0, i32 1, i32 2, i32 3>, %d 223 %3 = add <4 x i32> %1, %2 224 ret <4 x i32> %3 225} 226 227; Check for oneuse limit on fold 228define void @PR52039(ptr %pa, ptr %pb) { 229; SSE-LABEL: PR52039: 230; SSE: # %bb.0: 231; SSE-NEXT: movdqu (%rdi), %xmm0 232; SSE-NEXT: movdqu 16(%rdi), %xmm1 233; SSE-NEXT: pmovsxbd {{.*#+}} xmm2 = [10,10,10,10] 234; SSE-NEXT: movdqa %xmm2, %xmm3 235; SSE-NEXT: psubd %xmm1, %xmm3 236; SSE-NEXT: psubd %xmm0, %xmm2 237; SSE-NEXT: movdqa %xmm2, %xmm0 238; SSE-NEXT: paddd %xmm2, %xmm0 239; SSE-NEXT: paddd %xmm2, %xmm0 240; SSE-NEXT: movdqa %xmm3, %xmm1 241; SSE-NEXT: paddd %xmm3, %xmm1 242; SSE-NEXT: paddd %xmm3, %xmm1 243; SSE-NEXT: movdqu %xmm3, 16(%rsi) 244; SSE-NEXT: movdqu %xmm2, (%rsi) 245; SSE-NEXT: movdqu %xmm1, 16(%rdi) 246; SSE-NEXT: movdqu %xmm0, (%rdi) 247; SSE-NEXT: retq 248; 249; AVX1-LABEL: PR52039: 250; AVX1: # %bb.0: 251; AVX1-NEXT: vbroadcastss {{.*#+}} xmm0 = [10,10,10,10] 252; AVX1-NEXT: vpsubd 16(%rdi), %xmm0, %xmm1 253; AVX1-NEXT: vpsubd (%rdi), %xmm0, %xmm0 254; AVX1-NEXT: vpaddd %xmm0, %xmm0, %xmm2 255; AVX1-NEXT: vpaddd %xmm0, %xmm2, %xmm2 256; AVX1-NEXT: vpaddd %xmm1, %xmm1, %xmm3 257; AVX1-NEXT: vpaddd %xmm1, %xmm3, %xmm3 258; AVX1-NEXT: vmovdqu %xmm1, 16(%rsi) 259; AVX1-NEXT: vmovdqu %xmm0, (%rsi) 260; AVX1-NEXT: vmovdqu %xmm3, 16(%rdi) 261; AVX1-NEXT: vmovdqu %xmm2, (%rdi) 262; AVX1-NEXT: retq 263; 264; AVX2-LABEL: PR52039: 265; AVX2: # %bb.0: 266; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm0 = [10,10,10,10,10,10,10,10] 267; AVX2-NEXT: vpsubd (%rdi), %ymm0, %ymm0 268; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm1 269; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm1 270; AVX2-NEXT: vmovdqu %ymm0, (%rsi) 271; AVX2-NEXT: vmovdqu %ymm1, (%rdi) 272; AVX2-NEXT: vzeroupper 273; AVX2-NEXT: retq 274 %load = load <8 x i32>, ptr %pa, align 4 275 %sub = sub nsw <8 x i32> <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>, %load 276 %mul = mul nsw <8 x i32> %sub, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 277 store <8 x i32> %sub, ptr %pb, align 4 278 store <8 x i32> %mul, ptr %pa, align 4 279 ret void 280} 281 282; fold (a+b) -> (a|b) iff a and b share no bits. 283define <4 x i32> @combine_vec_add_uniquebits(<4 x i32> %a, <4 x i32> %b) { 284; SSE-LABEL: combine_vec_add_uniquebits: 285; SSE: # %bb.0: 286; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 287; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 288; SSE-NEXT: orps %xmm1, %xmm0 289; SSE-NEXT: retq 290; 291; AVX1-LABEL: combine_vec_add_uniquebits: 292; AVX1: # %bb.0: 293; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 294; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 295; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 296; AVX1-NEXT: retq 297; 298; AVX2-LABEL: combine_vec_add_uniquebits: 299; AVX2: # %bb.0: 300; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [61680,61680,61680,61680] 301; AVX2-NEXT: vandps %xmm2, %xmm0, %xmm0 302; AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [3855,3855,3855,3855] 303; AVX2-NEXT: vandps %xmm2, %xmm1, %xmm1 304; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0 305; AVX2-NEXT: retq 306 %1 = and <4 x i32> %a, <i32 61680, i32 61680, i32 61680, i32 61680> 307 %2 = and <4 x i32> %b, <i32 3855, i32 3855, i32 3855, i32 3855> 308 %3 = add <4 x i32> %1, %2 309 ret <4 x i32> %3 310} 311 312; fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 313define <4 x i32> @combine_vec_add_shl_neg0(<4 x i32> %x, <4 x i32> %y) { 314; SSE-LABEL: combine_vec_add_shl_neg0: 315; SSE: # %bb.0: 316; SSE-NEXT: pslld $5, %xmm1 317; SSE-NEXT: psubd %xmm1, %xmm0 318; SSE-NEXT: retq 319; 320; AVX-LABEL: combine_vec_add_shl_neg0: 321; AVX: # %bb.0: 322; AVX-NEXT: vpslld $5, %xmm1, %xmm1 323; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 324; AVX-NEXT: retq 325 %1 = sub <4 x i32> zeroinitializer, %y 326 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5> 327 %3 = add <4 x i32> %x, %2 328 ret <4 x i32> %3 329} 330 331; fold (add shl(0 - y, n), x) -> sub(x, shl(y, n)) 332define <4 x i32> @combine_vec_add_shl_neg1(<4 x i32> %x, <4 x i32> %y) { 333; SSE-LABEL: combine_vec_add_shl_neg1: 334; SSE: # %bb.0: 335; SSE-NEXT: pslld $5, %xmm1 336; SSE-NEXT: psubd %xmm1, %xmm0 337; SSE-NEXT: retq 338; 339; AVX-LABEL: combine_vec_add_shl_neg1: 340; AVX: # %bb.0: 341; AVX-NEXT: vpslld $5, %xmm1, %xmm1 342; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 343; AVX-NEXT: retq 344 %1 = sub <4 x i32> zeroinitializer, %y 345 %2 = shl <4 x i32> %1, <i32 5, i32 5, i32 5, i32 5> 346 %3 = add <4 x i32> %2, %x 347 ret <4 x i32> %3 348} 349 350; (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 351; and similar xforms where the inner op is either ~0 or 0. 352define <4 x i32> @combine_vec_add_and_compare(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { 353; SSE-LABEL: combine_vec_add_and_compare: 354; SSE: # %bb.0: 355; SSE-NEXT: pcmpeqd %xmm2, %xmm1 356; SSE-NEXT: psubd %xmm1, %xmm0 357; SSE-NEXT: retq 358; 359; AVX-LABEL: combine_vec_add_and_compare: 360; AVX: # %bb.0: 361; AVX-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 362; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 363; AVX-NEXT: retq 364 %1 = icmp eq <4 x i32> %a1, %a2 365 %2 = sext <4 x i1> %1 to <4 x i32> 366 %3 = and <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1> 367 %4 = add <4 x i32> %a0, %3 368 ret <4 x i32> %4 369} 370 371; add (sext i1), X -> sub X, (zext i1) 372define <4 x i32> @combine_vec_add_sext(<4 x i1> %a0, <4 x i32> %a1) { 373; SSE-LABEL: combine_vec_add_sext: 374; SSE: # %bb.0: 375; SSE-NEXT: pslld $31, %xmm0 376; SSE-NEXT: psrad $31, %xmm0 377; SSE-NEXT: paddd %xmm1, %xmm0 378; SSE-NEXT: retq 379; 380; AVX-LABEL: combine_vec_add_sext: 381; AVX: # %bb.0: 382; AVX-NEXT: vpslld $31, %xmm0, %xmm0 383; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 384; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 385; AVX-NEXT: retq 386 %1 = sext <4 x i1> %a0 to <4 x i32> 387 %2 = add <4 x i32> %1, %a1 388 ret <4 x i32> %2 389} 390 391; add (sext i1), X -> sub X, (zext i1) 392define <4 x i32> @combine_vec_add_sextinreg(<4 x i32> %a0, <4 x i32> %a1) { 393; SSE-LABEL: combine_vec_add_sextinreg: 394; SSE: # %bb.0: 395; SSE-NEXT: pslld $31, %xmm0 396; SSE-NEXT: psrad $31, %xmm0 397; SSE-NEXT: paddd %xmm1, %xmm0 398; SSE-NEXT: retq 399; 400; AVX-LABEL: combine_vec_add_sextinreg: 401; AVX: # %bb.0: 402; AVX-NEXT: vpslld $31, %xmm0, %xmm0 403; AVX-NEXT: vpsrad $31, %xmm0, %xmm0 404; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 405; AVX-NEXT: retq 406 %1 = shl <4 x i32> %a0, <i32 31, i32 31, i32 31, i32 31> 407 %2 = ashr <4 x i32> %1, <i32 31, i32 31, i32 31, i32 31> 408 %3 = add <4 x i32> %2, %a1 409 ret <4 x i32> %3 410} 411 412; (add (add (xor a, -1), b), 1) -> (sub b, a) 413define i32 @combine_add_add_not(i32 %a, i32 %b) { 414; CHECK-LABEL: combine_add_add_not: 415; CHECK: # %bb.0: 416; CHECK-NEXT: movl %esi, %eax 417; CHECK-NEXT: subl %edi, %eax 418; CHECK-NEXT: retq 419 %nota = xor i32 %a, -1 420 %add = add i32 %nota, %b 421 %r = add i32 %add, 1 422 ret i32 %r 423} 424 425define <4 x i32> @combine_vec_add_add_not(<4 x i32> %a, <4 x i32> %b) { 426; SSE-LABEL: combine_vec_add_add_not: 427; SSE: # %bb.0: 428; SSE-NEXT: psubd %xmm0, %xmm1 429; SSE-NEXT: movdqa %xmm1, %xmm0 430; SSE-NEXT: retq 431; 432; AVX-LABEL: combine_vec_add_add_not: 433; AVX: # %bb.0: 434; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0 435; AVX-NEXT: retq 436 %nota = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1> 437 %add = add <4 x i32> %nota, %b 438 %r = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1> 439 ret <4 x i32> %r 440} 441 442define i32 @combine_add_adc_constant(i32 %x, i32 %y, i32 %z) { 443; CHECK-LABEL: combine_add_adc_constant: 444; CHECK: # %bb.0: 445; CHECK-NEXT: movl %edi, %eax 446; CHECK-NEXT: btl $7, %edx 447; CHECK-NEXT: adcl $32, %eax 448; CHECK-NEXT: retq 449 %and = lshr i32 %z, 7 450 %bit = and i32 %and, 1 451 %add = add i32 %x, 32 452 %r = add i32 %add, %bit 453 ret i32 %r 454} 455 456declare {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) 457 458define i1 @sadd_add(i32 %a, i32 %b, ptr %p) { 459; CHECK-LABEL: sadd_add: 460; CHECK: # %bb.0: 461; CHECK-NEXT: # kill: def $edi killed $edi def $rdi 462; CHECK-NEXT: notl %edi 463; CHECK-NEXT: addl %esi, %edi 464; CHECK-NEXT: seto %al 465; CHECK-NEXT: leal 1(%rdi), %ecx 466; CHECK-NEXT: movl %ecx, (%rdx) 467; CHECK-NEXT: retq 468 %nota = xor i32 %a, -1 469 %a0 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %nota, i32 %b) 470 %e0 = extractvalue {i32, i1} %a0, 0 471 %e1 = extractvalue {i32, i1} %a0, 1 472 %res = add i32 %e0, 1 473 store i32 %res, ptr %p 474 ret i1 %e1 475} 476 477declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b) 478 479define i1 @uadd_add(i8 %a, i8 %b, ptr %p) { 480; CHECK-LABEL: uadd_add: 481; CHECK: # %bb.0: 482; CHECK-NEXT: notb %dil 483; CHECK-NEXT: addb %sil, %dil 484; CHECK-NEXT: setb %al 485; CHECK-NEXT: incb %dil 486; CHECK-NEXT: movb %dil, (%rdx) 487; CHECK-NEXT: retq 488 %nota = xor i8 %a, -1 489 %a0 = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %nota, i8 %b) 490 %e0 = extractvalue {i8, i1} %a0, 0 491 %e1 = extractvalue {i8, i1} %a0, 1 492 %res = add i8 %e0, 1 493 store i8 %res, ptr %p 494 ret i1 %e1 495} 496 497; This would crash because we tried to transform an add-with-overflow 498; based on the wrong result value. 499 500define i1 @PR51238(i1 %b, i8 %x, i8 %y, i8 %z) { 501; CHECK-LABEL: PR51238: 502; CHECK: # %bb.0: 503; CHECK-NEXT: notb %cl 504; CHECK-NEXT: xorl %eax, %eax 505; CHECK-NEXT: addb %dl, %cl 506; CHECK-NEXT: adcb $1, %al 507; CHECK-NEXT: # kill: def $al killed $al killed $eax 508; CHECK-NEXT: retq 509 %ny = xor i8 %y, -1 510 %nz = xor i8 %z, -1 511 %minxz = select i1 %b, i8 %x, i8 %nz 512 %cmpyz = icmp ult i8 %ny, %nz 513 %r = add i1 %cmpyz, true 514 ret i1 %r 515} 516 517define <2 x i64> @add_vec_x_notx(<2 x i64> %v0) nounwind { 518; SSE-LABEL: add_vec_x_notx: 519; SSE: # %bb.0: 520; SSE-NEXT: pcmpeqd %xmm0, %xmm0 521; SSE-NEXT: retq 522; 523; AVX-LABEL: add_vec_x_notx: 524; AVX: # %bb.0: 525; AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 526; AVX-NEXT: retq 527 %x = xor <2 x i64> %v0, <i64 -1, i64 -1> 528 %y = add <2 x i64> %v0, %x 529 ret <2 x i64> %y 530} 531 532define <2 x i64> @add_vec_notx_x(<2 x i64> %v0) nounwind { 533; SSE-LABEL: add_vec_notx_x: 534; SSE: # %bb.0: 535; SSE-NEXT: pcmpeqd %xmm0, %xmm0 536; SSE-NEXT: retq 537; 538; AVX-LABEL: add_vec_notx_x: 539; AVX: # %bb.0: 540; AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 541; AVX-NEXT: retq 542 %x = xor <2 x i64> %v0, <i64 -1, i64 -1> 543 %y = add <2 x i64> %x, %v0 544 ret <2 x i64> %y 545} 546 547define i64 @add_x_notx(i64 %v0) nounwind { 548; CHECK-LABEL: add_x_notx: 549; CHECK: # %bb.0: 550; CHECK-NEXT: movq $-1, %rax 551; CHECK-NEXT: retq 552 %x = xor i64 %v0, -1 553 %y = add i64 %v0, %x 554 ret i64 %y 555} 556 557define i64 @add_notx_x(i64 %v0) nounwind { 558; CHECK-LABEL: add_notx_x: 559; CHECK: # %bb.0: 560; CHECK-NEXT: movq $-1, %rax 561; CHECK-NEXT: retq 562 %x = xor i64 %v0, -1 563 %y = add i64 %x, %v0 564 ret i64 %y 565} 566