1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+adx < %s | FileCheck %s --check-prefix=CHECK 3; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=-adx < %s | FileCheck %s --check-prefix=CHECK 4 5target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 6target triple = "x86_64-unknown-unknown" 7 8; Stack reload folding tests. 9; 10; By including a nop call with sideeffects we can force a partial register spill of the 11; relevant registers and check that the reload is correctly folded into the instruction. 12 13define i8 @stack_fold_addcarry_u32(i8 %a0, i32 %a1, i32 %a2, ptr %a3) { 14; CHECK-LABEL: stack_fold_addcarry_u32: 15; CHECK: # %bb.0: 16; CHECK-NEXT: pushq %rbp 17; CHECK-NEXT: .cfi_def_cfa_offset 16 18; CHECK-NEXT: pushq %r15 19; CHECK-NEXT: .cfi_def_cfa_offset 24 20; CHECK-NEXT: pushq %r14 21; CHECK-NEXT: .cfi_def_cfa_offset 32 22; CHECK-NEXT: pushq %r13 23; CHECK-NEXT: .cfi_def_cfa_offset 40 24; CHECK-NEXT: pushq %r12 25; CHECK-NEXT: .cfi_def_cfa_offset 48 26; CHECK-NEXT: pushq %rbx 27; CHECK-NEXT: .cfi_def_cfa_offset 56 28; CHECK-NEXT: .cfi_offset %rbx, -56 29; CHECK-NEXT: .cfi_offset %r12, -48 30; CHECK-NEXT: .cfi_offset %r13, -40 31; CHECK-NEXT: .cfi_offset %r14, -32 32; CHECK-NEXT: .cfi_offset %r15, -24 33; CHECK-NEXT: .cfi_offset %rbp, -16 34; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 35; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 36; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 37; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 38; CHECK-NEXT: #APP 39; CHECK-NEXT: nop 40; CHECK-NEXT: #NO_APP 41; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 42; CHECK-NEXT: addb $-1, %al 43; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 44; CHECK-NEXT: adcl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 45; CHECK-NEXT: setb %al 46; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 47; CHECK-NEXT: movl %edx, (%rcx) 48; CHECK-NEXT: popq %rbx 49; CHECK-NEXT: .cfi_def_cfa_offset 48 50; CHECK-NEXT: popq %r12 51; CHECK-NEXT: .cfi_def_cfa_offset 40 52; CHECK-NEXT: popq %r13 53; CHECK-NEXT: .cfi_def_cfa_offset 32 54; CHECK-NEXT: popq %r14 55; CHECK-NEXT: .cfi_def_cfa_offset 24 56; CHECK-NEXT: popq %r15 57; CHECK-NEXT: .cfi_def_cfa_offset 16 58; CHECK-NEXT: popq %rbp 59; CHECK-NEXT: .cfi_def_cfa_offset 8 60; CHECK-NEXT: retq 61 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 62 %2 = call { i8, i32 } @llvm.x86.addcarry.32(i8 %a0, i32 %a1, i32 %a2) 63 %3 = extractvalue { i8, i32 } %2, 1 64 store i32 %3, ptr %a3, align 1 65 %4 = extractvalue { i8, i32 } %2, 0 66 ret i8 %4 67} 68 69define i8 @stack_fold_addcarry_u64(i8 %a0, i64 %a1, i64 %a2, ptr %a3) { 70; CHECK-LABEL: stack_fold_addcarry_u64: 71; CHECK: # %bb.0: 72; CHECK-NEXT: pushq %rbp 73; CHECK-NEXT: .cfi_def_cfa_offset 16 74; CHECK-NEXT: pushq %r15 75; CHECK-NEXT: .cfi_def_cfa_offset 24 76; CHECK-NEXT: pushq %r14 77; CHECK-NEXT: .cfi_def_cfa_offset 32 78; CHECK-NEXT: pushq %r13 79; CHECK-NEXT: .cfi_def_cfa_offset 40 80; CHECK-NEXT: pushq %r12 81; CHECK-NEXT: .cfi_def_cfa_offset 48 82; CHECK-NEXT: pushq %rbx 83; CHECK-NEXT: .cfi_def_cfa_offset 56 84; CHECK-NEXT: .cfi_offset %rbx, -56 85; CHECK-NEXT: .cfi_offset %r12, -48 86; CHECK-NEXT: .cfi_offset %r13, -40 87; CHECK-NEXT: .cfi_offset %r14, -32 88; CHECK-NEXT: .cfi_offset %r15, -24 89; CHECK-NEXT: .cfi_offset %rbp, -16 90; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 91; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 92; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 93; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 94; CHECK-NEXT: #APP 95; CHECK-NEXT: nop 96; CHECK-NEXT: #NO_APP 97; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 98; CHECK-NEXT: addb $-1, %al 99; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload 100; CHECK-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload 101; CHECK-NEXT: setb %al 102; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 103; CHECK-NEXT: movq %rdx, (%rcx) 104; CHECK-NEXT: popq %rbx 105; CHECK-NEXT: .cfi_def_cfa_offset 48 106; CHECK-NEXT: popq %r12 107; CHECK-NEXT: .cfi_def_cfa_offset 40 108; CHECK-NEXT: popq %r13 109; CHECK-NEXT: .cfi_def_cfa_offset 32 110; CHECK-NEXT: popq %r14 111; CHECK-NEXT: .cfi_def_cfa_offset 24 112; CHECK-NEXT: popq %r15 113; CHECK-NEXT: .cfi_def_cfa_offset 16 114; CHECK-NEXT: popq %rbp 115; CHECK-NEXT: .cfi_def_cfa_offset 8 116; CHECK-NEXT: retq 117 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 118 %2 = call { i8, i64 } @llvm.x86.addcarry.64(i8 %a0, i64 %a1, i64 %a2) 119 %3 = extractvalue { i8, i64 } %2, 1 120 store i64 %3, ptr %a3, align 1 121 %4 = extractvalue { i8, i64 } %2, 0 122 ret i8 %4 123} 124 125define i8 @stack_fold_addcarryx_u32(i8 %a0, i32 %a1, i32 %a2, ptr %a3) { 126; CHECK-LABEL: stack_fold_addcarryx_u32: 127; CHECK: # %bb.0: 128; CHECK-NEXT: pushq %rbp 129; CHECK-NEXT: .cfi_def_cfa_offset 16 130; CHECK-NEXT: pushq %r15 131; CHECK-NEXT: .cfi_def_cfa_offset 24 132; CHECK-NEXT: pushq %r14 133; CHECK-NEXT: .cfi_def_cfa_offset 32 134; CHECK-NEXT: pushq %r13 135; CHECK-NEXT: .cfi_def_cfa_offset 40 136; CHECK-NEXT: pushq %r12 137; CHECK-NEXT: .cfi_def_cfa_offset 48 138; CHECK-NEXT: pushq %rbx 139; CHECK-NEXT: .cfi_def_cfa_offset 56 140; CHECK-NEXT: .cfi_offset %rbx, -56 141; CHECK-NEXT: .cfi_offset %r12, -48 142; CHECK-NEXT: .cfi_offset %r13, -40 143; CHECK-NEXT: .cfi_offset %r14, -32 144; CHECK-NEXT: .cfi_offset %r15, -24 145; CHECK-NEXT: .cfi_offset %rbp, -16 146; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 147; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 148; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 149; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 150; CHECK-NEXT: #APP 151; CHECK-NEXT: nop 152; CHECK-NEXT: #NO_APP 153; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 154; CHECK-NEXT: addb $-1, %al 155; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 156; CHECK-NEXT: adcl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 157; CHECK-NEXT: setb %al 158; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 159; CHECK-NEXT: movl %edx, (%rcx) 160; CHECK-NEXT: popq %rbx 161; CHECK-NEXT: .cfi_def_cfa_offset 48 162; CHECK-NEXT: popq %r12 163; CHECK-NEXT: .cfi_def_cfa_offset 40 164; CHECK-NEXT: popq %r13 165; CHECK-NEXT: .cfi_def_cfa_offset 32 166; CHECK-NEXT: popq %r14 167; CHECK-NEXT: .cfi_def_cfa_offset 24 168; CHECK-NEXT: popq %r15 169; CHECK-NEXT: .cfi_def_cfa_offset 16 170; CHECK-NEXT: popq %rbp 171; CHECK-NEXT: .cfi_def_cfa_offset 8 172; CHECK-NEXT: retq 173 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 174 %2 = call { i8, i32 } @llvm.x86.addcarry.32(i8 %a0, i32 %a1, i32 %a2) 175 %3 = extractvalue { i8, i32 } %2, 1 176 store i32 %3, ptr %a3, align 1 177 %4 = extractvalue { i8, i32 } %2, 0 178 ret i8 %4 179} 180 181define i8 @stack_fold_addcarryx_u64(i8 %a0, i64 %a1, i64 %a2, ptr %a3) { 182; CHECK-LABEL: stack_fold_addcarryx_u64: 183; CHECK: # %bb.0: 184; CHECK-NEXT: pushq %rbp 185; CHECK-NEXT: .cfi_def_cfa_offset 16 186; CHECK-NEXT: pushq %r15 187; CHECK-NEXT: .cfi_def_cfa_offset 24 188; CHECK-NEXT: pushq %r14 189; CHECK-NEXT: .cfi_def_cfa_offset 32 190; CHECK-NEXT: pushq %r13 191; CHECK-NEXT: .cfi_def_cfa_offset 40 192; CHECK-NEXT: pushq %r12 193; CHECK-NEXT: .cfi_def_cfa_offset 48 194; CHECK-NEXT: pushq %rbx 195; CHECK-NEXT: .cfi_def_cfa_offset 56 196; CHECK-NEXT: .cfi_offset %rbx, -56 197; CHECK-NEXT: .cfi_offset %r12, -48 198; CHECK-NEXT: .cfi_offset %r13, -40 199; CHECK-NEXT: .cfi_offset %r14, -32 200; CHECK-NEXT: .cfi_offset %r15, -24 201; CHECK-NEXT: .cfi_offset %rbp, -16 202; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 203; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 204; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 205; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 206; CHECK-NEXT: #APP 207; CHECK-NEXT: nop 208; CHECK-NEXT: #NO_APP 209; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 210; CHECK-NEXT: addb $-1, %al 211; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload 212; CHECK-NEXT: adcq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload 213; CHECK-NEXT: setb %al 214; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 215; CHECK-NEXT: movq %rdx, (%rcx) 216; CHECK-NEXT: popq %rbx 217; CHECK-NEXT: .cfi_def_cfa_offset 48 218; CHECK-NEXT: popq %r12 219; CHECK-NEXT: .cfi_def_cfa_offset 40 220; CHECK-NEXT: popq %r13 221; CHECK-NEXT: .cfi_def_cfa_offset 32 222; CHECK-NEXT: popq %r14 223; CHECK-NEXT: .cfi_def_cfa_offset 24 224; CHECK-NEXT: popq %r15 225; CHECK-NEXT: .cfi_def_cfa_offset 16 226; CHECK-NEXT: popq %rbp 227; CHECK-NEXT: .cfi_def_cfa_offset 8 228; CHECK-NEXT: retq 229 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 230 %2 = call { i8, i64 } @llvm.x86.addcarry.64(i8 %a0, i64 %a1, i64 %a2) 231 %3 = extractvalue { i8, i64 } %2, 1 232 store i64 %3, ptr %a3, align 1 233 %4 = extractvalue { i8, i64 } %2, 0 234 ret i8 %4 235} 236 237define i8 @stack_fold_subborrow_u32(i8 %a0, i32 %a1, i32 %a2, ptr %a3) { 238; CHECK-LABEL: stack_fold_subborrow_u32: 239; CHECK: # %bb.0: 240; CHECK-NEXT: pushq %rbp 241; CHECK-NEXT: .cfi_def_cfa_offset 16 242; CHECK-NEXT: pushq %r15 243; CHECK-NEXT: .cfi_def_cfa_offset 24 244; CHECK-NEXT: pushq %r14 245; CHECK-NEXT: .cfi_def_cfa_offset 32 246; CHECK-NEXT: pushq %r13 247; CHECK-NEXT: .cfi_def_cfa_offset 40 248; CHECK-NEXT: pushq %r12 249; CHECK-NEXT: .cfi_def_cfa_offset 48 250; CHECK-NEXT: pushq %rbx 251; CHECK-NEXT: .cfi_def_cfa_offset 56 252; CHECK-NEXT: .cfi_offset %rbx, -56 253; CHECK-NEXT: .cfi_offset %r12, -48 254; CHECK-NEXT: .cfi_offset %r13, -40 255; CHECK-NEXT: .cfi_offset %r14, -32 256; CHECK-NEXT: .cfi_offset %r15, -24 257; CHECK-NEXT: .cfi_offset %rbp, -16 258; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 259; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 260; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 261; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 262; CHECK-NEXT: #APP 263; CHECK-NEXT: nop 264; CHECK-NEXT: #NO_APP 265; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 266; CHECK-NEXT: addb $-1, %al 267; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 268; CHECK-NEXT: sbbl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 269; CHECK-NEXT: setb %al 270; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 271; CHECK-NEXT: movl %edx, (%rcx) 272; CHECK-NEXT: popq %rbx 273; CHECK-NEXT: .cfi_def_cfa_offset 48 274; CHECK-NEXT: popq %r12 275; CHECK-NEXT: .cfi_def_cfa_offset 40 276; CHECK-NEXT: popq %r13 277; CHECK-NEXT: .cfi_def_cfa_offset 32 278; CHECK-NEXT: popq %r14 279; CHECK-NEXT: .cfi_def_cfa_offset 24 280; CHECK-NEXT: popq %r15 281; CHECK-NEXT: .cfi_def_cfa_offset 16 282; CHECK-NEXT: popq %rbp 283; CHECK-NEXT: .cfi_def_cfa_offset 8 284; CHECK-NEXT: retq 285 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 286 %2 = call { i8, i32 } @llvm.x86.subborrow.32(i8 %a0, i32 %a1, i32 %a2) 287 %3 = extractvalue { i8, i32 } %2, 1 288 store i32 %3, ptr %a3, align 1 289 %4 = extractvalue { i8, i32 } %2, 0 290 ret i8 %4 291} 292 293define i8 @stack_fold_subborrow_u64(i8 %a0, i64 %a1, i64 %a2, ptr %a3) { 294; CHECK-LABEL: stack_fold_subborrow_u64: 295; CHECK: # %bb.0: 296; CHECK-NEXT: pushq %rbp 297; CHECK-NEXT: .cfi_def_cfa_offset 16 298; CHECK-NEXT: pushq %r15 299; CHECK-NEXT: .cfi_def_cfa_offset 24 300; CHECK-NEXT: pushq %r14 301; CHECK-NEXT: .cfi_def_cfa_offset 32 302; CHECK-NEXT: pushq %r13 303; CHECK-NEXT: .cfi_def_cfa_offset 40 304; CHECK-NEXT: pushq %r12 305; CHECK-NEXT: .cfi_def_cfa_offset 48 306; CHECK-NEXT: pushq %rbx 307; CHECK-NEXT: .cfi_def_cfa_offset 56 308; CHECK-NEXT: .cfi_offset %rbx, -56 309; CHECK-NEXT: .cfi_offset %r12, -48 310; CHECK-NEXT: .cfi_offset %r13, -40 311; CHECK-NEXT: .cfi_offset %r14, -32 312; CHECK-NEXT: .cfi_offset %r15, -24 313; CHECK-NEXT: .cfi_offset %rbp, -16 314; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 315; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 316; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 317; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 318; CHECK-NEXT: #APP 319; CHECK-NEXT: nop 320; CHECK-NEXT: #NO_APP 321; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 322; CHECK-NEXT: addb $-1, %al 323; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload 324; CHECK-NEXT: sbbq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Folded Reload 325; CHECK-NEXT: setb %al 326; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload 327; CHECK-NEXT: movq %rdx, (%rcx) 328; CHECK-NEXT: popq %rbx 329; CHECK-NEXT: .cfi_def_cfa_offset 48 330; CHECK-NEXT: popq %r12 331; CHECK-NEXT: .cfi_def_cfa_offset 40 332; CHECK-NEXT: popq %r13 333; CHECK-NEXT: .cfi_def_cfa_offset 32 334; CHECK-NEXT: popq %r14 335; CHECK-NEXT: .cfi_def_cfa_offset 24 336; CHECK-NEXT: popq %r15 337; CHECK-NEXT: .cfi_def_cfa_offset 16 338; CHECK-NEXT: popq %rbp 339; CHECK-NEXT: .cfi_def_cfa_offset 8 340; CHECK-NEXT: retq 341 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 342 %2 = call { i8, i64 } @llvm.x86.subborrow.64(i8 %a0, i64 %a1, i64 %a2) 343 %3 = extractvalue { i8, i64 } %2, 1 344 store i64 %3, ptr %a3, align 1 345 %4 = extractvalue { i8, i64 } %2, 0 346 ret i8 %4 347} 348 349declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32) 350declare { i8, i64 } @llvm.x86.addcarry.64(i8, i64, i64) 351declare { i8, i32 } @llvm.x86.subborrow.32(i8, i32, i32) 352declare { i8, i64 } @llvm.x86.subborrow.64(i8, i64, i64) 353