1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown < %s | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-unknown-unknown" 6 7; Stack reload folding tests. 8; 9; By including a nop call with sideeffects we can force a partial register spill of the 10; relevant registers and check that the reload is correctly folded into the instruction. 11 12;TODO stack_fold_bsf_i16 13declare i16 @llvm.cttz.i16(i16, i1) 14 15define i32 @stack_fold_bsf_i32(i32 %a0) { 16; CHECK-LABEL: stack_fold_bsf_i32: 17; CHECK: # %bb.0: 18; CHECK-NEXT: pushq %rbp 19; CHECK-NEXT: .cfi_def_cfa_offset 16 20; CHECK-NEXT: pushq %r15 21; CHECK-NEXT: .cfi_def_cfa_offset 24 22; CHECK-NEXT: pushq %r14 23; CHECK-NEXT: .cfi_def_cfa_offset 32 24; CHECK-NEXT: pushq %r13 25; CHECK-NEXT: .cfi_def_cfa_offset 40 26; CHECK-NEXT: pushq %r12 27; CHECK-NEXT: .cfi_def_cfa_offset 48 28; CHECK-NEXT: pushq %rbx 29; CHECK-NEXT: .cfi_def_cfa_offset 56 30; CHECK-NEXT: .cfi_offset %rbx, -56 31; CHECK-NEXT: .cfi_offset %r12, -48 32; CHECK-NEXT: .cfi_offset %r13, -40 33; CHECK-NEXT: .cfi_offset %r14, -32 34; CHECK-NEXT: .cfi_offset %r15, -24 35; CHECK-NEXT: .cfi_offset %rbp, -16 36; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 37; CHECK-NEXT: #APP 38; CHECK-NEXT: nop 39; CHECK-NEXT: #NO_APP 40; CHECK-NEXT: rep bsfl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 41; CHECK-NEXT: popq %rbx 42; CHECK-NEXT: .cfi_def_cfa_offset 48 43; CHECK-NEXT: popq %r12 44; CHECK-NEXT: .cfi_def_cfa_offset 40 45; CHECK-NEXT: popq %r13 46; CHECK-NEXT: .cfi_def_cfa_offset 32 47; CHECK-NEXT: popq %r14 48; CHECK-NEXT: .cfi_def_cfa_offset 24 49; CHECK-NEXT: popq %r15 50; CHECK-NEXT: .cfi_def_cfa_offset 16 51; CHECK-NEXT: popq %rbp 52; CHECK-NEXT: .cfi_def_cfa_offset 8 53; CHECK-NEXT: retq 54 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 55 %2 = call i32 @llvm.cttz.i32(i32 %a0, i1 -1) 56 ret i32 %2 57} 58declare i32 @llvm.cttz.i32(i32, i1) 59 60define i64 @stack_fold_bsf_i64(i64 %a0) { 61; CHECK-LABEL: stack_fold_bsf_i64: 62; CHECK: # %bb.0: 63; CHECK-NEXT: pushq %rbp 64; CHECK-NEXT: .cfi_def_cfa_offset 16 65; CHECK-NEXT: pushq %r15 66; CHECK-NEXT: .cfi_def_cfa_offset 24 67; CHECK-NEXT: pushq %r14 68; CHECK-NEXT: .cfi_def_cfa_offset 32 69; CHECK-NEXT: pushq %r13 70; CHECK-NEXT: .cfi_def_cfa_offset 40 71; CHECK-NEXT: pushq %r12 72; CHECK-NEXT: .cfi_def_cfa_offset 48 73; CHECK-NEXT: pushq %rbx 74; CHECK-NEXT: .cfi_def_cfa_offset 56 75; CHECK-NEXT: .cfi_offset %rbx, -56 76; CHECK-NEXT: .cfi_offset %r12, -48 77; CHECK-NEXT: .cfi_offset %r13, -40 78; CHECK-NEXT: .cfi_offset %r14, -32 79; CHECK-NEXT: .cfi_offset %r15, -24 80; CHECK-NEXT: .cfi_offset %rbp, -16 81; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 82; CHECK-NEXT: #APP 83; CHECK-NEXT: nop 84; CHECK-NEXT: #NO_APP 85; CHECK-NEXT: rep bsfq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 86; CHECK-NEXT: popq %rbx 87; CHECK-NEXT: .cfi_def_cfa_offset 48 88; CHECK-NEXT: popq %r12 89; CHECK-NEXT: .cfi_def_cfa_offset 40 90; CHECK-NEXT: popq %r13 91; CHECK-NEXT: .cfi_def_cfa_offset 32 92; CHECK-NEXT: popq %r14 93; CHECK-NEXT: .cfi_def_cfa_offset 24 94; CHECK-NEXT: popq %r15 95; CHECK-NEXT: .cfi_def_cfa_offset 16 96; CHECK-NEXT: popq %rbp 97; CHECK-NEXT: .cfi_def_cfa_offset 8 98; CHECK-NEXT: retq 99 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 100 %2 = call i64 @llvm.cttz.i64(i64 %a0, i1 -1) 101 ret i64 %2 102} 103declare i64 @llvm.cttz.i64(i64, i1) 104 105;TODO stack_fold_bsr_i16 106declare i16 @llvm.ctlz.i16(i16, i1) 107 108define i32 @stack_fold_bsr_i32(i32 %a0) { 109; CHECK-LABEL: stack_fold_bsr_i32: 110; CHECK: # %bb.0: 111; CHECK-NEXT: pushq %rbp 112; CHECK-NEXT: .cfi_def_cfa_offset 16 113; CHECK-NEXT: pushq %r15 114; CHECK-NEXT: .cfi_def_cfa_offset 24 115; CHECK-NEXT: pushq %r14 116; CHECK-NEXT: .cfi_def_cfa_offset 32 117; CHECK-NEXT: pushq %r13 118; CHECK-NEXT: .cfi_def_cfa_offset 40 119; CHECK-NEXT: pushq %r12 120; CHECK-NEXT: .cfi_def_cfa_offset 48 121; CHECK-NEXT: pushq %rbx 122; CHECK-NEXT: .cfi_def_cfa_offset 56 123; CHECK-NEXT: .cfi_offset %rbx, -56 124; CHECK-NEXT: .cfi_offset %r12, -48 125; CHECK-NEXT: .cfi_offset %r13, -40 126; CHECK-NEXT: .cfi_offset %r14, -32 127; CHECK-NEXT: .cfi_offset %r15, -24 128; CHECK-NEXT: .cfi_offset %rbp, -16 129; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 130; CHECK-NEXT: #APP 131; CHECK-NEXT: nop 132; CHECK-NEXT: #NO_APP 133; CHECK-NEXT: bsrl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 134; CHECK-NEXT: xorl $31, %eax 135; CHECK-NEXT: popq %rbx 136; CHECK-NEXT: .cfi_def_cfa_offset 48 137; CHECK-NEXT: popq %r12 138; CHECK-NEXT: .cfi_def_cfa_offset 40 139; CHECK-NEXT: popq %r13 140; CHECK-NEXT: .cfi_def_cfa_offset 32 141; CHECK-NEXT: popq %r14 142; CHECK-NEXT: .cfi_def_cfa_offset 24 143; CHECK-NEXT: popq %r15 144; CHECK-NEXT: .cfi_def_cfa_offset 16 145; CHECK-NEXT: popq %rbp 146; CHECK-NEXT: .cfi_def_cfa_offset 8 147; CHECK-NEXT: retq 148 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 149 %2 = call i32 @llvm.ctlz.i32(i32 %a0, i1 -1) 150 ret i32 %2 151} 152declare i32 @llvm.ctlz.i32(i32, i1) 153 154define i64 @stack_fold_bsr_i64(i64 %a0) { 155; CHECK-LABEL: stack_fold_bsr_i64: 156; CHECK: # %bb.0: 157; CHECK-NEXT: pushq %rbp 158; CHECK-NEXT: .cfi_def_cfa_offset 16 159; CHECK-NEXT: pushq %r15 160; CHECK-NEXT: .cfi_def_cfa_offset 24 161; CHECK-NEXT: pushq %r14 162; CHECK-NEXT: .cfi_def_cfa_offset 32 163; CHECK-NEXT: pushq %r13 164; CHECK-NEXT: .cfi_def_cfa_offset 40 165; CHECK-NEXT: pushq %r12 166; CHECK-NEXT: .cfi_def_cfa_offset 48 167; CHECK-NEXT: pushq %rbx 168; CHECK-NEXT: .cfi_def_cfa_offset 56 169; CHECK-NEXT: .cfi_offset %rbx, -56 170; CHECK-NEXT: .cfi_offset %r12, -48 171; CHECK-NEXT: .cfi_offset %r13, -40 172; CHECK-NEXT: .cfi_offset %r14, -32 173; CHECK-NEXT: .cfi_offset %r15, -24 174; CHECK-NEXT: .cfi_offset %rbp, -16 175; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 176; CHECK-NEXT: #APP 177; CHECK-NEXT: nop 178; CHECK-NEXT: #NO_APP 179; CHECK-NEXT: bsrq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload 180; CHECK-NEXT: xorq $63, %rax 181; CHECK-NEXT: popq %rbx 182; CHECK-NEXT: .cfi_def_cfa_offset 48 183; CHECK-NEXT: popq %r12 184; CHECK-NEXT: .cfi_def_cfa_offset 40 185; CHECK-NEXT: popq %r13 186; CHECK-NEXT: .cfi_def_cfa_offset 32 187; CHECK-NEXT: popq %r14 188; CHECK-NEXT: .cfi_def_cfa_offset 24 189; CHECK-NEXT: popq %r15 190; CHECK-NEXT: .cfi_def_cfa_offset 16 191; CHECK-NEXT: popq %rbp 192; CHECK-NEXT: .cfi_def_cfa_offset 8 193; CHECK-NEXT: retq 194 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 195 %2 = call i64 @llvm.ctlz.i64(i64 %a0, i1 -1) 196 ret i64 %2 197} 198declare i64 @llvm.ctlz.i64(i64, i1) 199