1; RUN: llc < %s -mtriple=x86_64-pc-win32-coreclr -verify-machineinstrs | FileCheck %s -check-prefix=WIN_X64 2; RUN: llc < %s -mtriple=x86_64-pc-linux | FileCheck %s -check-prefix=LINUX 3 4; By default, windows CoreCLR requires an inline prologue stack expansion check 5; if more than 4096 bytes are allocated on the stack. 6 7; Prolog stack allocation >= 4096 bytes will require the probe sequence 8define i32 @main4k() nounwind { 9entry: 10; WIN_X64-LABEL:main4k: 11; WIN_X64: # %bb.0: 12; WIN_X64: movl $4096, %eax 13; WIN_X64: xorq %rcx, %rcx 14; WIN_X64: movq %rsp, %rdx 15; WIN_X64: subq %rax, %rdx 16; WIN_X64: cmovbq %rcx, %rdx 17; WIN_X64: movq %gs:16, %rcx 18; WIN_X64: cmpq %rcx, %rdx 19; WIN_X64: jae .LBB0_3 20; WIN_X64:# %bb.1: 21; WIN_X64: andq $-4096, %rdx 22; WIN_X64:.LBB0_2: 23; WIN_X64: addq $-4096, %rcx 24; WIN_X64: movb $0, (%rcx) 25; WIN_X64: cmpq %rcx, %rdx 26; WIN_X64: jne .LBB0_2 27; WIN_X64:.LBB0_3: 28; WIN_X64: subq %rax, %rsp 29; WIN_X64: xorl %eax, %eax 30; WIN_X64: addq $4096, %rsp 31; WIN_X64: retq 32; LINUX-LABEL:main4k: 33; LINUX-NOT: movq %gs:16, %rcx 34; LINUX: retq 35 %a = alloca [4096 x i8] 36 ret i32 0 37} 38 39; Prolog stack allocation >= 4096 bytes will require the probe sequence 40; Case with frame pointer 41define i32 @main4k_frame() nounwind "frame-pointer"="all" { 42entry: 43; WIN_X64-LABEL:main4k_frame: 44; WIN_X64: movq %gs:16, %rcx 45; LINUX-LABEL:main4k_frame: 46; LINUX-NOT: movq %gs:16, %rcx 47; LINUX: retq 48 %a = alloca [4096 x i8] 49 ret i32 0 50} 51 52; Prolog stack allocation >= 4096 bytes will require the probe sequence 53; Case with INT args 54define i32 @main4k_intargs(i32 %x, i32 %y) nounwind { 55entry: 56; WIN_X64: movq %gs:16, %rcx 57; LINUX-NOT: movq %gs:16, %rcx 58; LINUX: retq 59 %a = alloca [4096 x i8] 60 %t = add i32 %x, %y 61 ret i32 %t 62} 63 64; Prolog stack allocation >= 4096 bytes will require the probe sequence 65; Case with FP regs 66define i32 @main4k_fpargs(double %x, double %y) nounwind { 67entry: 68; WIN_X64: movq %gs:16, %rcx 69; LINUX-NOT: movq %gs:16, %rcx 70; LINUX: retq 71 %a = alloca [4096 x i8] 72 ret i32 0 73} 74 75; Prolog stack allocation >= 4096 bytes will require the probe sequence 76; Case with mixed regs 77define i32 @main4k_mixargs(double %x, i32 %y) nounwind { 78entry: 79; WIN_X64: movq %gs:16, %rcx 80; LINUX-NOT: movq %gs:16, %rcx 81; LINUX: retq 82 %a = alloca [4096 x i8] 83 ret i32 %y 84} 85 86; Make sure we don't emit the probe for a smaller prolog stack allocation. 87define i32 @main128() nounwind { 88entry: 89; WIN_X64-NOT: movq %gs:16, %rcx 90; WIN_X64: retq 91; LINUX-NOT: movq %gs:16, %rcx 92; LINUX: retq 93 %a = alloca [128 x i8] 94 ret i32 0 95} 96 97; Make sure we don't emit the probe sequence if not on windows even if the 98; caller has the Win64 calling convention. 99define win64cc i32 @main4k_win64() nounwind { 100entry: 101; WIN_X64: movq %gs:16, %rcx 102; LINUX-NOT: movq %gs:16, %rcx 103; LINUX: retq 104 %a = alloca [4096 x i8] 105 ret i32 0 106} 107 108declare i32 @bar(ptr) nounwind 109 110; Within-body inline probe expansion 111define win64cc i32 @main4k_alloca(i64 %n) nounwind { 112entry: 113; WIN_X64: callq bar 114; WIN_X64: movq %gs:16, [[R:%r.*]] 115; WIN_X64: callq bar 116; LINUX: callq bar 117; LINUX-NOT: movq %gs:16, [[R:%r.*]] 118; LINUX: callq bar 119 %a = alloca i8, i64 1024 120 %ra = call i32 @bar(ptr %a) nounwind 121 %b = alloca i8, i64 %n 122 %rb = call i32 @bar(ptr %b) nounwind 123 %r = add i32 %ra, %rb 124 ret i32 %r 125} 126 127; Influence of stack-probe-size attribute 128; Note this is not exposed in coreclr 129define i32 @test_probe_size() "stack-probe-size"="8192" nounwind { 130; WIN_X64-NOT: movq %gs:16, %rcx 131; WIN_X64: retq 132; LINUX-NOT: movq %gs:16, %rcx 133; LINUX: retq 134 %a = alloca [4096 x i8] 135 ret i32 0 136} 137