1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+lwp < %s | FileCheck %s 3 4target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 5target triple = "x86_64-unknown-unknown" 6 7; Stack reload folding tests. 8; 9; By including a nop call with sideeffects we can force a partial register spill of the 10; relevant registers and check that the reload is correctly folded into the instruction. 11 12define i8 @stack_fold_lwpins_u32(i32 %a0, i32 %a1) { 13; CHECK-LABEL: stack_fold_lwpins_u32: 14; CHECK: # %bb.0: 15; CHECK-NEXT: pushq %rbp 16; CHECK-NEXT: .cfi_def_cfa_offset 16 17; CHECK-NEXT: pushq %r15 18; CHECK-NEXT: .cfi_def_cfa_offset 24 19; CHECK-NEXT: pushq %r14 20; CHECK-NEXT: .cfi_def_cfa_offset 32 21; CHECK-NEXT: pushq %r13 22; CHECK-NEXT: .cfi_def_cfa_offset 40 23; CHECK-NEXT: pushq %r12 24; CHECK-NEXT: .cfi_def_cfa_offset 48 25; CHECK-NEXT: pushq %rbx 26; CHECK-NEXT: .cfi_def_cfa_offset 56 27; CHECK-NEXT: .cfi_offset %rbx, -56 28; CHECK-NEXT: .cfi_offset %r12, -48 29; CHECK-NEXT: .cfi_offset %r13, -40 30; CHECK-NEXT: .cfi_offset %r14, -32 31; CHECK-NEXT: .cfi_offset %r15, -24 32; CHECK-NEXT: .cfi_offset %rbp, -16 33; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 34; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 35; CHECK-NEXT: #APP 36; CHECK-NEXT: nop 37; CHECK-NEXT: #NO_APP 38; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 39; CHECK-NEXT: lwpins $2814, {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 40; CHECK-NEXT: # imm = 0xAFE 41; CHECK-NEXT: setb %al 42; CHECK-NEXT: popq %rbx 43; CHECK-NEXT: .cfi_def_cfa_offset 48 44; CHECK-NEXT: popq %r12 45; CHECK-NEXT: .cfi_def_cfa_offset 40 46; CHECK-NEXT: popq %r13 47; CHECK-NEXT: .cfi_def_cfa_offset 32 48; CHECK-NEXT: popq %r14 49; CHECK-NEXT: .cfi_def_cfa_offset 24 50; CHECK-NEXT: popq %r15 51; CHECK-NEXT: .cfi_def_cfa_offset 16 52; CHECK-NEXT: popq %rbp 53; CHECK-NEXT: .cfi_def_cfa_offset 8 54; CHECK-NEXT: retq 55 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 56 %2 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %a1, i32 2814) 57 ret i8 %2 58} 59declare i8 @llvm.x86.lwpins32(i32, i32, i32) 60 61define i8 @stack_fold_lwpins_u64(i64 %a0, i32 %a1) { 62; CHECK-LABEL: stack_fold_lwpins_u64: 63; CHECK: # %bb.0: 64; CHECK-NEXT: pushq %rbp 65; CHECK-NEXT: .cfi_def_cfa_offset 16 66; CHECK-NEXT: pushq %r15 67; CHECK-NEXT: .cfi_def_cfa_offset 24 68; CHECK-NEXT: pushq %r14 69; CHECK-NEXT: .cfi_def_cfa_offset 32 70; CHECK-NEXT: pushq %r13 71; CHECK-NEXT: .cfi_def_cfa_offset 40 72; CHECK-NEXT: pushq %r12 73; CHECK-NEXT: .cfi_def_cfa_offset 48 74; CHECK-NEXT: pushq %rbx 75; CHECK-NEXT: .cfi_def_cfa_offset 56 76; CHECK-NEXT: .cfi_offset %rbx, -56 77; CHECK-NEXT: .cfi_offset %r12, -48 78; CHECK-NEXT: .cfi_offset %r13, -40 79; CHECK-NEXT: .cfi_offset %r14, -32 80; CHECK-NEXT: .cfi_offset %r15, -24 81; CHECK-NEXT: .cfi_offset %rbp, -16 82; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 83; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 84; CHECK-NEXT: #APP 85; CHECK-NEXT: nop 86; CHECK-NEXT: #NO_APP 87; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 88; CHECK-NEXT: lwpins $2814, {{[-0-9]+}}(%r{{[sb]}}p), %rax # 4-byte Folded Reload 89; CHECK-NEXT: # imm = 0xAFE 90; CHECK-NEXT: setb %al 91; CHECK-NEXT: popq %rbx 92; CHECK-NEXT: .cfi_def_cfa_offset 48 93; CHECK-NEXT: popq %r12 94; CHECK-NEXT: .cfi_def_cfa_offset 40 95; CHECK-NEXT: popq %r13 96; CHECK-NEXT: .cfi_def_cfa_offset 32 97; CHECK-NEXT: popq %r14 98; CHECK-NEXT: .cfi_def_cfa_offset 24 99; CHECK-NEXT: popq %r15 100; CHECK-NEXT: .cfi_def_cfa_offset 16 101; CHECK-NEXT: popq %rbp 102; CHECK-NEXT: .cfi_def_cfa_offset 8 103; CHECK-NEXT: retq 104 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 105 %2 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 2814) 106 ret i8 %2 107} 108declare i8 @llvm.x86.lwpins64(i64, i32, i32) 109 110define void @stack_fold_lwpval_u32(i32 %a0, i32 %a1) { 111; CHECK-LABEL: stack_fold_lwpval_u32: 112; CHECK: # %bb.0: 113; CHECK-NEXT: pushq %rbp 114; CHECK-NEXT: .cfi_def_cfa_offset 16 115; CHECK-NEXT: pushq %r15 116; CHECK-NEXT: .cfi_def_cfa_offset 24 117; CHECK-NEXT: pushq %r14 118; CHECK-NEXT: .cfi_def_cfa_offset 32 119; CHECK-NEXT: pushq %r13 120; CHECK-NEXT: .cfi_def_cfa_offset 40 121; CHECK-NEXT: pushq %r12 122; CHECK-NEXT: .cfi_def_cfa_offset 48 123; CHECK-NEXT: pushq %rbx 124; CHECK-NEXT: .cfi_def_cfa_offset 56 125; CHECK-NEXT: .cfi_offset %rbx, -56 126; CHECK-NEXT: .cfi_offset %r12, -48 127; CHECK-NEXT: .cfi_offset %r13, -40 128; CHECK-NEXT: .cfi_offset %r14, -32 129; CHECK-NEXT: .cfi_offset %r15, -24 130; CHECK-NEXT: .cfi_offset %rbp, -16 131; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 132; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 133; CHECK-NEXT: #APP 134; CHECK-NEXT: nop 135; CHECK-NEXT: #NO_APP 136; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 137; CHECK-NEXT: lwpval $2814, {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 138; CHECK-NEXT: # imm = 0xAFE 139; CHECK-NEXT: popq %rbx 140; CHECK-NEXT: .cfi_def_cfa_offset 48 141; CHECK-NEXT: popq %r12 142; CHECK-NEXT: .cfi_def_cfa_offset 40 143; CHECK-NEXT: popq %r13 144; CHECK-NEXT: .cfi_def_cfa_offset 32 145; CHECK-NEXT: popq %r14 146; CHECK-NEXT: .cfi_def_cfa_offset 24 147; CHECK-NEXT: popq %r15 148; CHECK-NEXT: .cfi_def_cfa_offset 16 149; CHECK-NEXT: popq %rbp 150; CHECK-NEXT: .cfi_def_cfa_offset 8 151; CHECK-NEXT: retq 152 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 153 tail call void @llvm.x86.lwpval32(i32 %a0, i32 %a1, i32 2814) 154 ret void 155} 156declare void @llvm.x86.lwpval32(i32, i32, i32) 157 158define void @stack_fold_lwpval_u64(i64 %a0, i32 %a1) { 159; CHECK-LABEL: stack_fold_lwpval_u64: 160; CHECK: # %bb.0: 161; CHECK-NEXT: pushq %rbp 162; CHECK-NEXT: .cfi_def_cfa_offset 16 163; CHECK-NEXT: pushq %r15 164; CHECK-NEXT: .cfi_def_cfa_offset 24 165; CHECK-NEXT: pushq %r14 166; CHECK-NEXT: .cfi_def_cfa_offset 32 167; CHECK-NEXT: pushq %r13 168; CHECK-NEXT: .cfi_def_cfa_offset 40 169; CHECK-NEXT: pushq %r12 170; CHECK-NEXT: .cfi_def_cfa_offset 48 171; CHECK-NEXT: pushq %rbx 172; CHECK-NEXT: .cfi_def_cfa_offset 56 173; CHECK-NEXT: .cfi_offset %rbx, -56 174; CHECK-NEXT: .cfi_offset %r12, -48 175; CHECK-NEXT: .cfi_offset %r13, -40 176; CHECK-NEXT: .cfi_offset %r14, -32 177; CHECK-NEXT: .cfi_offset %r15, -24 178; CHECK-NEXT: .cfi_offset %rbp, -16 179; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 180; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 181; CHECK-NEXT: #APP 182; CHECK-NEXT: nop 183; CHECK-NEXT: #NO_APP 184; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload 185; CHECK-NEXT: lwpval $2814, {{[-0-9]+}}(%r{{[sb]}}p), %rax # 4-byte Folded Reload 186; CHECK-NEXT: # imm = 0xAFE 187; CHECK-NEXT: popq %rbx 188; CHECK-NEXT: .cfi_def_cfa_offset 48 189; CHECK-NEXT: popq %r12 190; CHECK-NEXT: .cfi_def_cfa_offset 40 191; CHECK-NEXT: popq %r13 192; CHECK-NEXT: .cfi_def_cfa_offset 32 193; CHECK-NEXT: popq %r14 194; CHECK-NEXT: .cfi_def_cfa_offset 24 195; CHECK-NEXT: popq %r15 196; CHECK-NEXT: .cfi_def_cfa_offset 16 197; CHECK-NEXT: popq %rbp 198; CHECK-NEXT: .cfi_def_cfa_offset 8 199; CHECK-NEXT: retq 200 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 201 tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 2814) 202 ret void 203} 204declare void @llvm.x86.lwpval64(i64, i32, i32) 205