1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STD 3; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE 4 5; Incremental updates of the instruction depths should be enough for this test 6; case. 7; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math \ 8; RUN: -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE 9 10; Verify that the first two adds are independent regardless of how the inputs are 11; commuted. The destination registers are used as source registers for the third add. 12 13define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) { 14; CHECK-STD-LABEL: reassociate_adds1: 15; CHECK-STD: // %bb.0: 16; CHECK-STD-NEXT: fadd s0, s0, s1 17; CHECK-STD-NEXT: fadd s0, s0, s2 18; CHECK-STD-NEXT: fadd s0, s0, s3 19; CHECK-STD-NEXT: ret 20; 21; CHECK-UNSAFE-LABEL: reassociate_adds1: 22; CHECK-UNSAFE: // %bb.0: 23; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 24; CHECK-UNSAFE-NEXT: fadd s1, s2, s3 25; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 26; CHECK-UNSAFE-NEXT: ret 27 %t0 = fadd float %x0, %x1 28 %t1 = fadd float %t0, %x2 29 %t2 = fadd float %t1, %x3 30 ret float %t2 31} 32 33define float @reassociate_adds1_fast(float %x0, float %x1, float %x2, float %x3) { 34; CHECK-LABEL: reassociate_adds1_fast: 35; CHECK: // %bb.0: 36; CHECK-NEXT: fadd s0, s0, s1 37; CHECK-NEXT: fadd s1, s2, s3 38; CHECK-NEXT: fadd s0, s0, s1 39; CHECK-NEXT: ret 40 %t0 = fadd fast float %x0, %x1 41 %t1 = fadd fast float %t0, %x2 42 %t2 = fadd fast float %t1, %x3 43 ret float %t2 44} 45 46define float @reassociate_adds1_reassoc(float %x0, float %x1, float %x2, float %x3) { 47; CHECK-STD-LABEL: reassociate_adds1_reassoc: 48; CHECK-STD: // %bb.0: 49; CHECK-STD-NEXT: fadd s0, s0, s1 50; CHECK-STD-NEXT: fadd s0, s0, s2 51; CHECK-STD-NEXT: fadd s0, s0, s3 52; CHECK-STD-NEXT: ret 53; 54; CHECK-UNSAFE-LABEL: reassociate_adds1_reassoc: 55; CHECK-UNSAFE: // %bb.0: 56; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 57; CHECK-UNSAFE-NEXT: fadd s1, s2, s3 58; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 59; CHECK-UNSAFE-NEXT: ret 60 %t0 = fadd reassoc float %x0, %x1 61 %t1 = fadd reassoc float %t0, %x2 62 %t2 = fadd reassoc float %t1, %x3 63 ret float %t2 64} 65 66define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) { 67; CHECK-STD-LABEL: reassociate_adds2: 68; CHECK-STD: // %bb.0: 69; CHECK-STD-NEXT: fadd s0, s0, s1 70; CHECK-STD-NEXT: fadd s0, s2, s0 71; CHECK-STD-NEXT: fadd s0, s0, s3 72; CHECK-STD-NEXT: ret 73; 74; CHECK-UNSAFE-LABEL: reassociate_adds2: 75; CHECK-UNSAFE: // %bb.0: 76; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 77; CHECK-UNSAFE-NEXT: fadd s1, s2, s3 78; CHECK-UNSAFE-NEXT: fadd s0, s1, s0 79; CHECK-UNSAFE-NEXT: ret 80 %t0 = fadd float %x0, %x1 81 %t1 = fadd float %x2, %t0 82 %t2 = fadd float %t1, %x3 83 ret float %t2 84} 85 86define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) { 87; CHECK-STD-LABEL: reassociate_adds3: 88; CHECK-STD: // %bb.0: 89; CHECK-STD-NEXT: fadd s0, s0, s1 90; CHECK-STD-NEXT: fadd s0, s0, s2 91; CHECK-STD-NEXT: fadd s0, s3, s0 92; CHECK-STD-NEXT: ret 93; 94; CHECK-UNSAFE-LABEL: reassociate_adds3: 95; CHECK-UNSAFE: // %bb.0: 96; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 97; CHECK-UNSAFE-NEXT: fadd s1, s3, s2 98; CHECK-UNSAFE-NEXT: fadd s0, s1, s0 99; CHECK-UNSAFE-NEXT: ret 100 %t0 = fadd float %x0, %x1 101 %t1 = fadd float %t0, %x2 102 %t2 = fadd float %x3, %t1 103 ret float %t2 104} 105 106define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) { 107; CHECK-STD-LABEL: reassociate_adds4: 108; CHECK-STD: // %bb.0: 109; CHECK-STD-NEXT: fadd s0, s0, s1 110; CHECK-STD-NEXT: fadd s0, s2, s0 111; CHECK-STD-NEXT: fadd s0, s3, s0 112; CHECK-STD-NEXT: ret 113; 114; CHECK-UNSAFE-LABEL: reassociate_adds4: 115; CHECK-UNSAFE: // %bb.0: 116; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 117; CHECK-UNSAFE-NEXT: fadd s1, s3, s2 118; CHECK-UNSAFE-NEXT: fadd s0, s1, s0 119; CHECK-UNSAFE-NEXT: ret 120 %t0 = fadd float %x0, %x1 121 %t1 = fadd float %x2, %t0 122 %t2 = fadd float %x3, %t1 123 ret float %t2 124} 125 126; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not 127; produced because that would cost more compile time. 128 129define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) { 130; CHECK-STD-LABEL: reassociate_adds5: 131; CHECK-STD: // %bb.0: 132; CHECK-STD-NEXT: fadd s0, s0, s1 133; CHECK-STD-NEXT: fadd s0, s0, s2 134; CHECK-STD-NEXT: fadd s0, s0, s3 135; CHECK-STD-NEXT: fadd s0, s0, s4 136; CHECK-STD-NEXT: fadd s0, s0, s5 137; CHECK-STD-NEXT: fadd s0, s0, s6 138; CHECK-STD-NEXT: fadd s0, s0, s7 139; CHECK-STD-NEXT: ret 140; 141; CHECK-UNSAFE-LABEL: reassociate_adds5: 142; CHECK-UNSAFE: // %bb.0: 143; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 144; CHECK-UNSAFE-NEXT: fadd s1, s2, s3 145; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 146; CHECK-UNSAFE-NEXT: fadd s1, s4, s5 147; CHECK-UNSAFE-NEXT: fadd s1, s1, s6 148; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 149; CHECK-UNSAFE-NEXT: fadd s0, s0, s7 150; CHECK-UNSAFE-NEXT: ret 151 %t0 = fadd float %x0, %x1 152 %t1 = fadd float %t0, %x2 153 %t2 = fadd float %t1, %x3 154 %t3 = fadd float %t2, %x4 155 %t4 = fadd float %t3, %x5 156 %t5 = fadd float %t4, %x6 157 %t6 = fadd float %t5, %x7 158 ret float %t6 159} 160 161; Verify that we only need two associative operations to reassociate the operands. 162; Also, we should reassociate such that the result of the high latency division 163; is used by the final 'add' rather than reassociating the %x3 operand with the 164; division. The latter reassociation would not improve anything. 165 166define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) { 167; CHECK-STD-LABEL: reassociate_adds6: 168; CHECK-STD: // %bb.0: 169; CHECK-STD-NEXT: fdiv s0, s0, s1 170; CHECK-STD-NEXT: fadd s0, s2, s0 171; CHECK-STD-NEXT: fadd s0, s3, s0 172; CHECK-STD-NEXT: ret 173; 174; CHECK-UNSAFE-LABEL: reassociate_adds6: 175; CHECK-UNSAFE: // %bb.0: 176; CHECK-UNSAFE-NEXT: fdiv s0, s0, s1 177; CHECK-UNSAFE-NEXT: fadd s1, s3, s2 178; CHECK-UNSAFE-NEXT: fadd s0, s1, s0 179; CHECK-UNSAFE-NEXT: ret 180 %t0 = fdiv float %x0, %x1 181 %t1 = fadd float %x2, %t0 182 %t2 = fadd float %x3, %t1 183 ret float %t2 184} 185 186; Verify that scalar single-precision multiplies are reassociated. 187 188define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) { 189; CHECK-STD-LABEL: reassociate_muls1: 190; CHECK-STD: // %bb.0: 191; CHECK-STD-NEXT: fdiv s0, s0, s1 192; CHECK-STD-NEXT: fmul s0, s2, s0 193; CHECK-STD-NEXT: fmul s0, s3, s0 194; CHECK-STD-NEXT: ret 195; 196; CHECK-UNSAFE-LABEL: reassociate_muls1: 197; CHECK-UNSAFE: // %bb.0: 198; CHECK-UNSAFE-NEXT: fdiv s0, s0, s1 199; CHECK-UNSAFE-NEXT: fmul s1, s3, s2 200; CHECK-UNSAFE-NEXT: fmul s0, s1, s0 201; CHECK-UNSAFE-NEXT: ret 202 %t0 = fdiv float %x0, %x1 203 %t1 = fmul float %x2, %t0 204 %t2 = fmul float %x3, %t1 205 ret float %t2 206} 207 208; Verify that scalar double-precision adds are reassociated. 209 210define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) { 211; CHECK-STD-LABEL: reassociate_adds_double: 212; CHECK-STD: // %bb.0: 213; CHECK-STD-NEXT: fdiv d0, d0, d1 214; CHECK-STD-NEXT: fadd d0, d2, d0 215; CHECK-STD-NEXT: fadd d0, d3, d0 216; CHECK-STD-NEXT: ret 217; 218; CHECK-UNSAFE-LABEL: reassociate_adds_double: 219; CHECK-UNSAFE: // %bb.0: 220; CHECK-UNSAFE-NEXT: fdiv d0, d0, d1 221; CHECK-UNSAFE-NEXT: fadd d1, d3, d2 222; CHECK-UNSAFE-NEXT: fadd d0, d1, d0 223; CHECK-UNSAFE-NEXT: ret 224 %t0 = fdiv double %x0, %x1 225 %t1 = fadd double %x2, %t0 226 %t2 = fadd double %x3, %t1 227 ret double %t2 228} 229 230; Verify that scalar double-precision multiplies are reassociated. 231 232define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) { 233; CHECK-STD-LABEL: reassociate_muls_double: 234; CHECK-STD: // %bb.0: 235; CHECK-STD-NEXT: fdiv d0, d0, d1 236; CHECK-STD-NEXT: fmul d0, d2, d0 237; CHECK-STD-NEXT: fmul d0, d3, d0 238; CHECK-STD-NEXT: ret 239; 240; CHECK-UNSAFE-LABEL: reassociate_muls_double: 241; CHECK-UNSAFE: // %bb.0: 242; CHECK-UNSAFE-NEXT: fdiv d0, d0, d1 243; CHECK-UNSAFE-NEXT: fmul d1, d3, d2 244; CHECK-UNSAFE-NEXT: fmul d0, d1, d0 245; CHECK-UNSAFE-NEXT: ret 246 %t0 = fdiv double %x0, %x1 247 %t1 = fmul double %x2, %t0 248 %t2 = fmul double %x3, %t1 249 ret double %t2 250} 251 252; Verify that scalar half-precision adds are reassociated. 253 254define half @reassociate_adds_half(half %x0, half %x1, half %x2, half %x3) { 255; CHECK-STD-LABEL: reassociate_adds_half: 256; CHECK-STD: // %bb.0: 257; CHECK-STD-NEXT: fdiv h0, h0, h1 258; CHECK-STD-NEXT: fadd h0, h2, h0 259; CHECK-STD-NEXT: fadd h0, h3, h0 260; CHECK-STD-NEXT: ret 261; 262; CHECK-UNSAFE-LABEL: reassociate_adds_half: 263; CHECK-UNSAFE: // %bb.0: 264; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1 265; CHECK-UNSAFE-NEXT: fadd h2, h3, h2 266; CHECK-UNSAFE-NEXT: fadd h0, h2, h0 267; CHECK-UNSAFE-NEXT: ret 268 %t0 = fdiv half %x0, %x1 269 %t1 = fadd half %x2, %t0 270 %t2 = fadd half %x3, %t1 271 ret half %t2 272} 273 274; Verify that scalar half-precision multiplies are reassociated. 275 276define half @reassociate_muls_half(half %x0, half %x1, half %x2, half %x3) { 277; CHECK-STD-LABEL: reassociate_muls_half: 278; CHECK-STD: // %bb.0: 279; CHECK-STD-NEXT: fdiv h0, h0, h1 280; CHECK-STD-NEXT: fmul h0, h2, h0 281; CHECK-STD-NEXT: fmul h0, h3, h0 282; CHECK-STD-NEXT: ret 283; 284; CHECK-UNSAFE-LABEL: reassociate_muls_half: 285; CHECK-UNSAFE: // %bb.0: 286; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1 287; CHECK-UNSAFE-NEXT: fmul h2, h3, h2 288; CHECK-UNSAFE-NEXT: fmul h0, h2, h0 289; CHECK-UNSAFE-NEXT: ret 290 %t0 = fdiv half %x0, %x1 291 %t1 = fmul half %x2, %t0 292 %t2 = fmul half %x3, %t1 293 ret half %t2 294} 295 296; Verify that scalar integer adds are reassociated. 297 298define i32 @reassociate_adds_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { 299; CHECK-LABEL: reassociate_adds_i32: 300; CHECK: // %bb.0: 301; CHECK-NEXT: udiv w8, w0, w1 302; CHECK-NEXT: add w9, w3, w2 303; CHECK-NEXT: add w0, w9, w8 304; CHECK-NEXT: ret 305 %t0 = udiv i32 %x0, %x1 306 %t1 = add i32 %x2, %t0 307 %t2 = add i32 %x3, %t1 308 ret i32 %t2 309} 310 311define i64 @reassociate_adds_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { 312; CHECK-LABEL: reassociate_adds_i64: 313; CHECK: // %bb.0: 314; CHECK-NEXT: udiv x8, x0, x1 315; CHECK-NEXT: add x9, x3, x2 316; CHECK-NEXT: add x0, x9, x8 317; CHECK-NEXT: ret 318 %t0 = udiv i64 %x0, %x1 319 %t1 = add i64 %x2, %t0 320 %t2 = add i64 %x3, %t1 321 ret i64 %t2 322} 323 324; Verify that scalar bitwise operations are reassociated. 325 326define i32 @reassociate_ands_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { 327; CHECK-LABEL: reassociate_ands_i32: 328; CHECK: // %bb.0: 329; CHECK-NEXT: and w8, w0, w1 330; CHECK-NEXT: and w9, w2, w3 331; CHECK-NEXT: and w0, w8, w9 332; CHECK-NEXT: ret 333 %t0 = and i32 %x0, %x1 334 %t1 = and i32 %t0, %x2 335 %t2 = and i32 %t1, %x3 336 ret i32 %t2 337} 338 339define i64 @reassociate_ors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) { 340; CHECK-LABEL: reassociate_ors_i64: 341; CHECK: // %bb.0: 342; CHECK-NEXT: orr x8, x0, x1 343; CHECK-NEXT: orr x9, x2, x3 344; CHECK-NEXT: orr x0, x8, x9 345; CHECK-NEXT: ret 346 %t0 = or i64 %x0, %x1 347 %t1 = or i64 %t0, %x2 348 %t2 = or i64 %t1, %x3 349 ret i64 %t2 350} 351 352define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { 353; CHECK-LABEL: reassociate_xors_i32: 354; CHECK: // %bb.0: 355; CHECK-NEXT: eor w8, w0, w1 356; CHECK-NEXT: eor w9, w2, w3 357; CHECK-NEXT: eor w0, w8, w9 358; CHECK-NEXT: ret 359 %t0 = xor i32 %x0, %x1 360 %t1 = xor i32 %t0, %x2 361 %t2 = xor i32 %t1, %x3 362 ret i32 %t2 363} 364 365; Verify that we reassociate vector instructions too. 366 367define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 368; CHECK-STD-LABEL: vector_reassociate_adds1: 369; CHECK-STD: // %bb.0: 370; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s 371; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v2.4s 372; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v3.4s 373; CHECK-STD-NEXT: ret 374; 375; CHECK-UNSAFE-LABEL: vector_reassociate_adds1: 376; CHECK-UNSAFE: // %bb.0: 377; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s 378; CHECK-UNSAFE-NEXT: fadd v1.4s, v2.4s, v3.4s 379; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s 380; CHECK-UNSAFE-NEXT: ret 381 %t0 = fadd <4 x float> %x0, %x1 382 %t1 = fadd <4 x float> %t0, %x2 383 %t2 = fadd <4 x float> %t1, %x3 384 ret <4 x float> %t2 385} 386 387define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 388; CHECK-STD-LABEL: vector_reassociate_adds2: 389; CHECK-STD: // %bb.0: 390; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s 391; CHECK-STD-NEXT: fadd v0.4s, v2.4s, v0.4s 392; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v3.4s 393; CHECK-STD-NEXT: ret 394; 395; CHECK-UNSAFE-LABEL: vector_reassociate_adds2: 396; CHECK-UNSAFE: // %bb.0: 397; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s 398; CHECK-UNSAFE-NEXT: fadd v1.4s, v2.4s, v3.4s 399; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s 400; CHECK-UNSAFE-NEXT: ret 401 %t0 = fadd <4 x float> %x0, %x1 402 %t1 = fadd <4 x float> %x2, %t0 403 %t2 = fadd <4 x float> %t1, %x3 404 ret <4 x float> %t2 405} 406 407define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 408; CHECK-STD-LABEL: vector_reassociate_adds3: 409; CHECK-STD: // %bb.0: 410; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s 411; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v2.4s 412; CHECK-STD-NEXT: fadd v0.4s, v3.4s, v0.4s 413; CHECK-STD-NEXT: ret 414; 415; CHECK-UNSAFE-LABEL: vector_reassociate_adds3: 416; CHECK-UNSAFE: // %bb.0: 417; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s 418; CHECK-UNSAFE-NEXT: fadd v1.4s, v3.4s, v2.4s 419; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s 420; CHECK-UNSAFE-NEXT: ret 421 %t0 = fadd <4 x float> %x0, %x1 422 %t1 = fadd <4 x float> %t0, %x2 423 %t2 = fadd <4 x float> %x3, %t1 424 ret <4 x float> %t2 425} 426 427define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 428; CHECK-STD-LABEL: vector_reassociate_adds4: 429; CHECK-STD: // %bb.0: 430; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s 431; CHECK-STD-NEXT: fadd v0.4s, v2.4s, v0.4s 432; CHECK-STD-NEXT: fadd v0.4s, v3.4s, v0.4s 433; CHECK-STD-NEXT: ret 434; 435; CHECK-UNSAFE-LABEL: vector_reassociate_adds4: 436; CHECK-UNSAFE: // %bb.0: 437; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s 438; CHECK-UNSAFE-NEXT: fadd v1.4s, v3.4s, v2.4s 439; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s 440; CHECK-UNSAFE-NEXT: ret 441 %t0 = fadd <4 x float> %x0, %x1 442 %t1 = fadd <4 x float> %x2, %t0 443 %t2 = fadd <4 x float> %x3, %t1 444 ret <4 x float> %t2 445} 446 447; Verify that 64-bit vector half-precision adds are reassociated. 448 449define <4 x half> @reassociate_adds_v4f16(<4 x half> %x0, <4 x half> %x1, <4 x half> %x2, <4 x half> %x3) { 450; CHECK-STD-LABEL: reassociate_adds_v4f16: 451; CHECK-STD: // %bb.0: 452; CHECK-STD-NEXT: fadd v0.4h, v0.4h, v1.4h 453; CHECK-STD-NEXT: fadd v0.4h, v2.4h, v0.4h 454; CHECK-STD-NEXT: fadd v0.4h, v3.4h, v0.4h 455; CHECK-STD-NEXT: ret 456; 457; CHECK-UNSAFE-LABEL: reassociate_adds_v4f16: 458; CHECK-UNSAFE: // %bb.0: 459; CHECK-UNSAFE-NEXT: fadd v0.4h, v0.4h, v1.4h 460; CHECK-UNSAFE-NEXT: fadd v1.4h, v3.4h, v2.4h 461; CHECK-UNSAFE-NEXT: fadd v0.4h, v1.4h, v0.4h 462; CHECK-UNSAFE-NEXT: ret 463 %t0 = fadd <4 x half> %x0, %x1 464 %t1 = fadd <4 x half> %x2, %t0 465 %t2 = fadd <4 x half> %x3, %t1 466 ret <4 x half> %t2 467} 468 469; Verify that 128-bit vector half-precision multiplies are reassociated. 470 471define <8 x half> @reassociate_muls_v8f16(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, <8 x half> %x3) { 472; CHECK-STD-LABEL: reassociate_muls_v8f16: 473; CHECK-STD: // %bb.0: 474; CHECK-STD-NEXT: fadd v0.8h, v0.8h, v1.8h 475; CHECK-STD-NEXT: fmul v0.8h, v2.8h, v0.8h 476; CHECK-STD-NEXT: fmul v0.8h, v3.8h, v0.8h 477; CHECK-STD-NEXT: ret 478; 479; CHECK-UNSAFE-LABEL: reassociate_muls_v8f16: 480; CHECK-UNSAFE: // %bb.0: 481; CHECK-UNSAFE-NEXT: fadd v0.8h, v0.8h, v1.8h 482; CHECK-UNSAFE-NEXT: fmul v1.8h, v3.8h, v2.8h 483; CHECK-UNSAFE-NEXT: fmul v0.8h, v1.8h, v0.8h 484; CHECK-UNSAFE-NEXT: ret 485 %t0 = fadd <8 x half> %x0, %x1 486 %t1 = fmul <8 x half> %x2, %t0 487 %t2 = fmul <8 x half> %x3, %t1 488 ret <8 x half> %t2 489} 490 491; Verify that 128-bit vector single-precision multiplies are reassociated. 492 493define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 494; CHECK-STD-LABEL: reassociate_muls_v4f32: 495; CHECK-STD: // %bb.0: 496; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s 497; CHECK-STD-NEXT: fmul v0.4s, v2.4s, v0.4s 498; CHECK-STD-NEXT: fmul v0.4s, v3.4s, v0.4s 499; CHECK-STD-NEXT: ret 500; 501; CHECK-UNSAFE-LABEL: reassociate_muls_v4f32: 502; CHECK-UNSAFE: // %bb.0: 503; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s 504; CHECK-UNSAFE-NEXT: fmul v1.4s, v3.4s, v2.4s 505; CHECK-UNSAFE-NEXT: fmul v0.4s, v1.4s, v0.4s 506; CHECK-UNSAFE-NEXT: ret 507 %t0 = fadd <4 x float> %x0, %x1 508 %t1 = fmul <4 x float> %x2, %t0 509 %t2 = fmul <4 x float> %x3, %t1 510 ret <4 x float> %t2 511} 512 513; Verify that 128-bit vector double-precision multiplies are reassociated. 514 515define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 516; CHECK-STD-LABEL: reassociate_muls_v2f64: 517; CHECK-STD: // %bb.0: 518; CHECK-STD-NEXT: fadd v0.2d, v0.2d, v1.2d 519; CHECK-STD-NEXT: fmul v0.2d, v2.2d, v0.2d 520; CHECK-STD-NEXT: fmul v0.2d, v3.2d, v0.2d 521; CHECK-STD-NEXT: ret 522; 523; CHECK-UNSAFE-LABEL: reassociate_muls_v2f64: 524; CHECK-UNSAFE: // %bb.0: 525; CHECK-UNSAFE-NEXT: fadd v0.2d, v0.2d, v1.2d 526; CHECK-UNSAFE-NEXT: fmul v1.2d, v3.2d, v2.2d 527; CHECK-UNSAFE-NEXT: fmul v0.2d, v1.2d, v0.2d 528; CHECK-UNSAFE-NEXT: ret 529 %t0 = fadd <2 x double> %x0, %x1 530 %t1 = fmul <2 x double> %x2, %t0 531 %t2 = fmul <2 x double> %x3, %t1 532 ret <2 x double> %t2 533} 534 535; Verify that vector integer arithmetic operations are reassociated. 536 537define <2 x i32> @reassociate_muls_v2i32(<2 x i32> %x0, <2 x i32> %x1, <2 x i32> %x2, <2 x i32> %x3) { 538; CHECK-LABEL: reassociate_muls_v2i32: 539; CHECK: // %bb.0: 540; CHECK-NEXT: mul v0.2s, v0.2s, v1.2s 541; CHECK-NEXT: mul v1.2s, v3.2s, v2.2s 542; CHECK-NEXT: mul v0.2s, v1.2s, v0.2s 543; CHECK-NEXT: ret 544 %t0 = mul <2 x i32> %x0, %x1 545 %t1 = mul <2 x i32> %x2, %t0 546 %t2 = mul <2 x i32> %x3, %t1 547 ret <2 x i32> %t2 548} 549 550define <2 x i64> @reassociate_adds_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, <2 x i64> %x3) { 551; CHECK-LABEL: reassociate_adds_v2i64: 552; CHECK: // %bb.0: 553; CHECK-NEXT: add v0.2d, v0.2d, v1.2d 554; CHECK-NEXT: add v1.2d, v3.2d, v2.2d 555; CHECK-NEXT: add v0.2d, v1.2d, v0.2d 556; CHECK-NEXT: ret 557 %t0 = add <2 x i64> %x0, %x1 558 %t1 = add <2 x i64> %x2, %t0 559 %t2 = add <2 x i64> %x3, %t1 560 ret <2 x i64> %t2 561} 562 563; Verify that vector bitwise operations are reassociated. 564 565define <16 x i8> @reassociate_ands_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, <16 x i8> %x3) { 566; CHECK-LABEL: reassociate_ands_v16i8: 567; CHECK: // %bb.0: 568; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b 569; CHECK-NEXT: and v1.16b, v2.16b, v3.16b 570; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 571; CHECK-NEXT: ret 572 %t0 = or <16 x i8> %x0, %x1 573 %t1 = and <16 x i8> %t0, %x2 574 %t2 = and <16 x i8> %t1, %x3 575 ret <16 x i8> %t2 576} 577 578define <4 x i16> @reassociate_ors_v4i16(<4 x i16> %x0, <4 x i16> %x1, <4 x i16> %x2, <4 x i16> %x3) { 579; CHECK-LABEL: reassociate_ors_v4i16: 580; CHECK: // %bb.0: 581; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b 582; CHECK-NEXT: orr v1.8b, v2.8b, v3.8b 583; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b 584; CHECK-NEXT: ret 585 %t0 = xor <4 x i16> %x0, %x1 586 %t1 = or <4 x i16> %t0, %x2 587 %t2 = or <4 x i16> %t1, %x3 588 ret <4 x i16> %t2 589} 590 591define <4 x i32> @reassociate_xors_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) { 592; CHECK-LABEL: reassociate_xors_v4i32: 593; CHECK: // %bb.0: 594; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 595; CHECK-NEXT: eor v1.16b, v2.16b, v3.16b 596; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b 597; CHECK-NEXT: ret 598 %t0 = and <4 x i32> %x0, %x1 599 %t1 = xor <4 x i32> %t0, %x2 600 %t2 = xor <4 x i32> %t1, %x3 601 ret <4 x i32> %t2 602} 603 604; Verify that scalable vector FP arithmetic operations are reassociated. 605 606define <vscale x 8 x half> @reassociate_adds_nxv4f16(<vscale x 8 x half> %x0, <vscale x 8 x half> %x1, <vscale x 8 x half> %x2, <vscale x 8 x half> %x3) { 607; CHECK-STD-LABEL: reassociate_adds_nxv4f16: 608; CHECK-STD: // %bb.0: 609; CHECK-STD-NEXT: fadd z0.h, z0.h, z1.h 610; CHECK-STD-NEXT: fadd z0.h, z2.h, z0.h 611; CHECK-STD-NEXT: fadd z0.h, z3.h, z0.h 612; CHECK-STD-NEXT: ret 613; 614; CHECK-UNSAFE-LABEL: reassociate_adds_nxv4f16: 615; CHECK-UNSAFE: // %bb.0: 616; CHECK-UNSAFE-NEXT: fadd z0.h, z0.h, z1.h 617; CHECK-UNSAFE-NEXT: fadd z1.h, z3.h, z2.h 618; CHECK-UNSAFE-NEXT: fadd z0.h, z1.h, z0.h 619; CHECK-UNSAFE-NEXT: ret 620 %t0 = fadd reassoc <vscale x 8 x half> %x0, %x1 621 %t1 = fadd reassoc <vscale x 8 x half> %x2, %t0 622 %t2 = fadd reassoc <vscale x 8 x half> %x3, %t1 623 ret <vscale x 8 x half> %t2 624} 625 626define <vscale x 4 x float> @reassociate_adds_nxv4f32(<vscale x 4 x float> %x0, <vscale x 4 x float> %x1, <vscale x 4 x float> %x2, <vscale x 4 x float> %x3) { 627; CHECK-STD-LABEL: reassociate_adds_nxv4f32: 628; CHECK-STD: // %bb.0: 629; CHECK-STD-NEXT: fadd z0.s, z0.s, z1.s 630; CHECK-STD-NEXT: fadd z0.s, z2.s, z0.s 631; CHECK-STD-NEXT: fadd z0.s, z3.s, z0.s 632; CHECK-STD-NEXT: ret 633; 634; CHECK-UNSAFE-LABEL: reassociate_adds_nxv4f32: 635; CHECK-UNSAFE: // %bb.0: 636; CHECK-UNSAFE-NEXT: fadd z0.s, z0.s, z1.s 637; CHECK-UNSAFE-NEXT: fadd z1.s, z3.s, z2.s 638; CHECK-UNSAFE-NEXT: fadd z0.s, z1.s, z0.s 639; CHECK-UNSAFE-NEXT: ret 640 %t0 = fadd reassoc <vscale x 4 x float> %x0, %x1 641 %t1 = fadd reassoc <vscale x 4 x float> %x2, %t0 642 %t2 = fadd reassoc <vscale x 4 x float> %x3, %t1 643 ret <vscale x 4 x float> %t2 644} 645 646define <vscale x 2 x double> @reassociate_muls_nxv2f64(<vscale x 2 x double> %x0, <vscale x 2 x double> %x1, <vscale x 2 x double> %x2, <vscale x 2 x double> %x3) { 647; CHECK-STD-LABEL: reassociate_muls_nxv2f64: 648; CHECK-STD: // %bb.0: 649; CHECK-STD-NEXT: fmul z0.d, z0.d, z1.d 650; CHECK-STD-NEXT: fmul z0.d, z2.d, z0.d 651; CHECK-STD-NEXT: fmul z0.d, z3.d, z0.d 652; CHECK-STD-NEXT: ret 653; 654; CHECK-UNSAFE-LABEL: reassociate_muls_nxv2f64: 655; CHECK-UNSAFE: // %bb.0: 656; CHECK-UNSAFE-NEXT: fmul z0.d, z0.d, z1.d 657; CHECK-UNSAFE-NEXT: fmul z1.d, z3.d, z2.d 658; CHECK-UNSAFE-NEXT: fmul z0.d, z1.d, z0.d 659; CHECK-UNSAFE-NEXT: ret 660 %t0 = fmul reassoc <vscale x 2 x double> %x0, %x1 661 %t1 = fmul reassoc <vscale x 2 x double> %x2, %t0 662 %t2 = fmul reassoc <vscale x 2 x double> %x3, %t1 663 ret <vscale x 2 x double> %t2 664} 665 666; Verify that scalable vector integer arithmetic operations are reassociated. 667 668define <vscale x 16 x i8> @reassociate_muls_nxv16i8(<vscale x 16 x i8> %x0, <vscale x 16 x i8> %x1, <vscale x 16 x i8> %x2, <vscale x 16 x i8> %x3) { 669; CHECK-LABEL: reassociate_muls_nxv16i8: 670; CHECK: // %bb.0: 671; CHECK-NEXT: mul z0.b, z0.b, z1.b 672; CHECK-NEXT: mul z1.b, z3.b, z2.b 673; CHECK-NEXT: mul z0.b, z1.b, z0.b 674; CHECK-NEXT: ret 675 %t0 = mul <vscale x 16 x i8> %x0, %x1 676 %t1 = mul <vscale x 16 x i8> %x2, %t0 677 %t2 = mul <vscale x 16 x i8> %x3, %t1 678 ret <vscale x 16 x i8> %t2 679} 680 681define <vscale x 8 x i16> @reassociate_adds_nxv8i16(<vscale x 8 x i16> %x0, <vscale x 8 x i16> %x1, <vscale x 8 x i16> %x2, <vscale x 8 x i16> %x3) { 682; CHECK-LABEL: reassociate_adds_nxv8i16: 683; CHECK: // %bb.0: 684; CHECK-NEXT: add z0.h, z0.h, z1.h 685; CHECK-NEXT: add z1.h, z3.h, z2.h 686; CHECK-NEXT: add z0.h, z1.h, z0.h 687; CHECK-NEXT: ret 688 %t0 = add <vscale x 8 x i16> %x0, %x1 689 %t1 = add <vscale x 8 x i16> %x2, %t0 690 %t2 = add <vscale x 8 x i16> %x3, %t1 691 ret <vscale x 8 x i16> %t2 692} 693 694define <vscale x 4 x i32> @reassociate_muls_nxv4i32(<vscale x 4 x i32> %x0, <vscale x 4 x i32> %x1, <vscale x 4 x i32> %x2, <vscale x 4 x i32> %x3) { 695; CHECK-LABEL: reassociate_muls_nxv4i32: 696; CHECK: // %bb.0: 697; CHECK-NEXT: mul z0.s, z0.s, z1.s 698; CHECK-NEXT: mul z1.s, z3.s, z2.s 699; CHECK-NEXT: mul z0.s, z1.s, z0.s 700; CHECK-NEXT: ret 701 %t0 = mul <vscale x 4 x i32> %x0, %x1 702 %t1 = mul <vscale x 4 x i32> %x2, %t0 703 %t2 = mul <vscale x 4 x i32> %x3, %t1 704 ret <vscale x 4 x i32> %t2 705} 706 707define <vscale x 2 x i64> @reassociate_adds_nxv2i64(<vscale x 2 x i64> %x0, <vscale x 2 x i64> %x1, <vscale x 2 x i64> %x2, <vscale x 2 x i64> %x3) { 708; CHECK-LABEL: reassociate_adds_nxv2i64: 709; CHECK: // %bb.0: 710; CHECK-NEXT: add z0.d, z0.d, z1.d 711; CHECK-NEXT: add z1.d, z3.d, z2.d 712; CHECK-NEXT: add z0.d, z1.d, z0.d 713; CHECK-NEXT: ret 714 %t0 = add <vscale x 2 x i64> %x0, %x1 715 %t1 = add <vscale x 2 x i64> %x2, %t0 716 %t2 = add <vscale x 2 x i64> %x3, %t1 717 ret <vscale x 2 x i64> %t2 718} 719 720; Verify that scalable vector bitwise operations are reassociated. 721 722define <vscale x 16 x i8> @reassociate_ands_nxv16i8(<vscale x 16 x i8> %x0, <vscale x 16 x i8> %x1, <vscale x 16 x i8> %x2, <vscale x 16 x i8> %x3) { 723; CHECK-LABEL: reassociate_ands_nxv16i8: 724; CHECK: // %bb.0: 725; CHECK-NEXT: orr z0.d, z0.d, z1.d 726; CHECK-NEXT: and z1.d, z2.d, z3.d 727; CHECK-NEXT: and z0.d, z0.d, z1.d 728; CHECK-NEXT: ret 729 %t0 = or <vscale x 16 x i8> %x0, %x1 730 %t1 = and <vscale x 16 x i8> %t0, %x2 731 %t2 = and <vscale x 16 x i8> %t1, %x3 732 ret <vscale x 16 x i8> %t2 733} 734 735define <vscale x 8 x i16> @reassociate_ors_nxv8i16(<vscale x 8 x i16> %x0, <vscale x 8 x i16> %x1, <vscale x 8 x i16> %x2, <vscale x 8 x i16> %x3) { 736; CHECK-LABEL: reassociate_ors_nxv8i16: 737; CHECK: // %bb.0: 738; CHECK-NEXT: eor z0.d, z0.d, z1.d 739; CHECK-NEXT: orr z1.d, z2.d, z3.d 740; CHECK-NEXT: orr z0.d, z0.d, z1.d 741; CHECK-NEXT: ret 742 %t0 = xor <vscale x 8 x i16> %x0, %x1 743 %t1 = or <vscale x 8 x i16> %t0, %x2 744 %t2 = or <vscale x 8 x i16> %t1, %x3 745 ret <vscale x 8 x i16> %t2 746} 747 748; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016 749; Verify that reassociation is not happening needlessly or wrongly. 750 751declare double @bar() 752 753define double @reassociate_adds_from_calls() { 754; CHECK-STD-LABEL: reassociate_adds_from_calls: 755; CHECK-STD: // %bb.0: 756; CHECK-STD-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill 757; CHECK-STD-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill 758; CHECK-STD-NEXT: str x30, [sp, #24] // 8-byte Folded Spill 759; CHECK-STD-NEXT: .cfi_def_cfa_offset 32 760; CHECK-STD-NEXT: .cfi_offset w30, -8 761; CHECK-STD-NEXT: .cfi_offset b8, -16 762; CHECK-STD-NEXT: .cfi_offset b9, -24 763; CHECK-STD-NEXT: .cfi_offset b10, -32 764; CHECK-STD-NEXT: bl bar 765; CHECK-STD-NEXT: fmov d8, d0 766; CHECK-STD-NEXT: bl bar 767; CHECK-STD-NEXT: fmov d9, d0 768; CHECK-STD-NEXT: bl bar 769; CHECK-STD-NEXT: fmov d10, d0 770; CHECK-STD-NEXT: bl bar 771; CHECK-STD-NEXT: fadd d1, d8, d9 772; CHECK-STD-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload 773; CHECK-STD-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload 774; CHECK-STD-NEXT: fadd d1, d1, d10 775; CHECK-STD-NEXT: fadd d0, d1, d0 776; CHECK-STD-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload 777; CHECK-STD-NEXT: ret 778; 779; CHECK-UNSAFE-LABEL: reassociate_adds_from_calls: 780; CHECK-UNSAFE: // %bb.0: 781; CHECK-UNSAFE-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill 782; CHECK-UNSAFE-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill 783; CHECK-UNSAFE-NEXT: str x30, [sp, #24] // 8-byte Folded Spill 784; CHECK-UNSAFE-NEXT: .cfi_def_cfa_offset 32 785; CHECK-UNSAFE-NEXT: .cfi_offset w30, -8 786; CHECK-UNSAFE-NEXT: .cfi_offset b8, -16 787; CHECK-UNSAFE-NEXT: .cfi_offset b9, -24 788; CHECK-UNSAFE-NEXT: .cfi_offset b10, -32 789; CHECK-UNSAFE-NEXT: bl bar 790; CHECK-UNSAFE-NEXT: fmov d8, d0 791; CHECK-UNSAFE-NEXT: bl bar 792; CHECK-UNSAFE-NEXT: fmov d9, d0 793; CHECK-UNSAFE-NEXT: bl bar 794; CHECK-UNSAFE-NEXT: fmov d10, d0 795; CHECK-UNSAFE-NEXT: bl bar 796; CHECK-UNSAFE-NEXT: fadd d1, d8, d9 797; CHECK-UNSAFE-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload 798; CHECK-UNSAFE-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload 799; CHECK-UNSAFE-NEXT: fadd d0, d10, d0 800; CHECK-UNSAFE-NEXT: fadd d0, d1, d0 801; CHECK-UNSAFE-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload 802; CHECK-UNSAFE-NEXT: ret 803 %x0 = call double @bar() 804 %x1 = call double @bar() 805 %x2 = call double @bar() 806 %x3 = call double @bar() 807 %t0 = fadd double %x0, %x1 808 %t1 = fadd double %t0, %x2 809 %t2 = fadd double %t1, %x3 810 ret double %t2 811} 812 813define double @already_reassociated() { 814; CHECK-LABEL: already_reassociated: 815; CHECK: // %bb.0: 816; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill 817; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill 818; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill 819; CHECK-NEXT: .cfi_def_cfa_offset 32 820; CHECK-NEXT: .cfi_offset w30, -8 821; CHECK-NEXT: .cfi_offset b8, -16 822; CHECK-NEXT: .cfi_offset b9, -24 823; CHECK-NEXT: .cfi_offset b10, -32 824; CHECK-NEXT: bl bar 825; CHECK-NEXT: fmov d8, d0 826; CHECK-NEXT: bl bar 827; CHECK-NEXT: fmov d9, d0 828; CHECK-NEXT: bl bar 829; CHECK-NEXT: fmov d10, d0 830; CHECK-NEXT: bl bar 831; CHECK-NEXT: fadd d1, d8, d9 832; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload 833; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload 834; CHECK-NEXT: fadd d0, d10, d0 835; CHECK-NEXT: fadd d0, d1, d0 836; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload 837; CHECK-NEXT: ret 838 %x0 = call double @bar() 839 %x1 = call double @bar() 840 %x2 = call double @bar() 841 %x3 = call double @bar() 842 %t0 = fadd double %x0, %x1 843 %t1 = fadd double %x2, %x3 844 %t2 = fadd double %t0, %t1 845 ret double %t2 846} 847