1f4a2713aSLionel Sambuc; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=0 | FileCheck %s 2f4a2713aSLionel Sambuc; RUN: llc < %s -mcpu=cortex-a8 -align-neon-spills=1 | FileCheck %s --check-prefix=NEON 3f4a2713aSLionel Sambuctarget datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" 4f4a2713aSLionel Sambuctarget triple = "thumbv7-apple-ios" 5f4a2713aSLionel Sambuc 6f4a2713aSLionel Sambuc; CHECK: f 7f4a2713aSLionel Sambuc; This function is forced to spill a double. 8f4a2713aSLionel Sambuc; Verify that the spill slot is properly aligned. 9f4a2713aSLionel Sambuc; 10f4a2713aSLionel Sambuc; The caller-saved r4 is used as a scratch register for stack realignment. 11f4a2713aSLionel Sambuc; CHECK: push {r4, r7, lr} 12*0a6a1f1dSLionel Sambuc; CHECK: bfc r4, #0, #3 13f4a2713aSLionel Sambuc; CHECK: mov sp, r4 14f4a2713aSLionel Sambucdefine void @f(double* nocapture %p) nounwind ssp { 15f4a2713aSLionel Sambucentry: 16f4a2713aSLionel Sambuc %0 = load double* %p, align 4 17f4a2713aSLionel Sambuc tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() nounwind 18f4a2713aSLionel Sambuc tail call void @g() nounwind 19f4a2713aSLionel Sambuc store double %0, double* %p, align 4 20f4a2713aSLionel Sambuc ret void 21f4a2713aSLionel Sambuc} 22f4a2713aSLionel Sambuc 23f4a2713aSLionel Sambuc; NEON: f 24f4a2713aSLionel Sambuc; NEON: push {r4, r7, lr} 25f4a2713aSLionel Sambuc; NEON: sub.w r4, sp, #64 26*0a6a1f1dSLionel Sambuc; NEON: bfc r4, #0, #4 27f4a2713aSLionel Sambuc; Stack pointer must be updated before the spills. 28f4a2713aSLionel Sambuc; NEON: mov sp, r4 29f4a2713aSLionel Sambuc; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]! 30f4a2713aSLionel Sambuc; NEON: vst1.64 {d12, d13, d14, d15}, [r4:128] 31f4a2713aSLionel Sambuc; Stack pointer adjustment for the stack frame contents. 32f4a2713aSLionel Sambuc; This could legally happen before the spills. 33f4a2713aSLionel Sambuc; Since the spill slot is only 8 bytes, technically it would be fine to only 34f4a2713aSLionel Sambuc; subtract #8 here. That would leave sp less aligned than some stack slots, 35f4a2713aSLionel Sambuc; and would probably blow MFI's mind. 36f4a2713aSLionel Sambuc; NEON: sub sp, #16 37f4a2713aSLionel Sambuc; The epilog is free to use another scratch register than r4. 38f4a2713aSLionel Sambuc; NEON: add r[[R4:[0-9]+]], sp, #16 39f4a2713aSLionel Sambuc; NEON: vld1.64 {d8, d9, d10, d11}, [r[[R4]]:128]! 40f4a2713aSLionel Sambuc; NEON: vld1.64 {d12, d13, d14, d15}, [r[[R4]]:128] 41f4a2713aSLionel Sambuc; The stack pointer restore must happen after the reloads. 42f4a2713aSLionel Sambuc; NEON: mov sp, 43f4a2713aSLionel Sambuc; NEON: pop 44f4a2713aSLionel Sambuc 45f4a2713aSLionel Sambucdeclare void @g() 46f4a2713aSLionel Sambuc 47f4a2713aSLionel Sambuc; Spill 7 d-registers. 48f4a2713aSLionel Sambucdefine void @f7(double* nocapture %p) nounwind ssp { 49f4a2713aSLionel Sambucentry: 50f4a2713aSLionel Sambuc tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14}"() nounwind 51f4a2713aSLionel Sambuc ret void 52f4a2713aSLionel Sambuc} 53f4a2713aSLionel Sambuc 54f4a2713aSLionel Sambuc; NEON: f7 55f4a2713aSLionel Sambuc; NEON: push {r4, r7, lr} 56f4a2713aSLionel Sambuc; NEON: sub.w r4, sp, #56 57*0a6a1f1dSLionel Sambuc; NEON: bfc r4, #0, #4 58f4a2713aSLionel Sambuc; Stack pointer must be updated before the spills. 59f4a2713aSLionel Sambuc; NEON: mov sp, r4 60f4a2713aSLionel Sambuc; NEON: vst1.64 {d8, d9, d10, d11}, [r4:128]! 61f4a2713aSLionel Sambuc; NEON: vst1.64 {d12, d13}, [r4:128] 62f4a2713aSLionel Sambuc; NEON: vstr d14, [r4, #16] 63f4a2713aSLionel Sambuc; Epilog 64f4a2713aSLionel Sambuc; NEON: vld1.64 {d8, d9, d10, d11}, 65f4a2713aSLionel Sambuc; NEON: vld1.64 {d12, d13}, 66f4a2713aSLionel Sambuc; NEON: vldr d14, 67f4a2713aSLionel Sambuc; The stack pointer restore must happen after the reloads. 68f4a2713aSLionel Sambuc; NEON: mov sp, 69f4a2713aSLionel Sambuc; NEON: pop 70f4a2713aSLionel Sambuc 71f4a2713aSLionel Sambuc; Spill 7 d-registers, leave a hole. 72f4a2713aSLionel Sambucdefine void @f3plus4(double* nocapture %p) nounwind ssp { 73f4a2713aSLionel Sambucentry: 74f4a2713aSLionel Sambuc tail call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d12},~{d13},~{d14},~{d15}"() nounwind 75f4a2713aSLionel Sambuc ret void 76f4a2713aSLionel Sambuc} 77f4a2713aSLionel Sambuc 78f4a2713aSLionel Sambuc; Aligned spilling only works for contiguous ranges starting from d8. 79f4a2713aSLionel Sambuc; The rest goes to the standard vpush instructions. 80f4a2713aSLionel Sambuc; NEON: f3plus4 81f4a2713aSLionel Sambuc; NEON: push {r4, r7, lr} 82f4a2713aSLionel Sambuc; NEON: vpush {d12, d13, d14, d15} 83f4a2713aSLionel Sambuc; NEON: sub.w r4, sp, #24 84*0a6a1f1dSLionel Sambuc; NEON: bfc r4, #0, #4 85f4a2713aSLionel Sambuc; Stack pointer must be updated before the spills. 86f4a2713aSLionel Sambuc; NEON: mov sp, r4 87f4a2713aSLionel Sambuc; NEON: vst1.64 {d8, d9}, [r4:128] 88f4a2713aSLionel Sambuc; NEON: vstr d10, [r4, #16] 89f4a2713aSLionel Sambuc; Epilog 90f4a2713aSLionel Sambuc; NEON: vld1.64 {d8, d9}, 91f4a2713aSLionel Sambuc; NEON: vldr d10, [{{.*}}, #16] 92f4a2713aSLionel Sambuc; The stack pointer restore must happen after the reloads. 93f4a2713aSLionel Sambuc; NEON: mov sp, 94f4a2713aSLionel Sambuc; NEON: vpop {d12, d13, d14, d15} 95f4a2713aSLionel Sambuc; NEON: pop 96