1; RUN: llc < %s -O0 -march=nvptx -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s --check-prefixes=CHECK,CHECK32 2; RUN: llc < %s -O0 -march=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | FileCheck %s --check-prefixes=CHECK,CHECK64 3; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -O0 -march=nvptx -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %} 4; RUN: %if ptxas %{ llc < %s -O0 -march=nvptx64 -mattr=+ptx60 -mcpu=sm_30 | %ptxas-verify %} 5 6; CHECK: .address_size [[BITS:32|64]] 7 8%struct.__va_list_tag = type { ptr, ptr, i32, i32 } 9 10@foo_ptr = internal addrspace(1) global ptr @foo, align 8 11 12define i32 @foo(i32 %a, ...) { 13entry: 14 %al = alloca [1 x %struct.__va_list_tag], align 8 15 %al2 = alloca [1 x %struct.__va_list_tag], align 8 16 17; Test va_start 18; CHECK: .param .align 8 .b8 foo_vararg[] 19; CHECK: mov.b[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], foo_vararg; 20; CHECK-NEXT: st.u[[BITS]] [%SP], [[VA_PTR]]; 21 22 call void @llvm.va_start(ptr %al) 23 24; Test va_copy() 25; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP]; 26; CHECK-NEXT: st.u[[BITS]] [%SP+{{[0-9]+}}], [[VA_PTR]]; 27 28 call void @llvm.va_copy(ptr %al2, ptr %al) 29 30; Test va_arg(ap, int32_t) 31; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP]; 32; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_TMP:%(r|rd)[0-9]+]], [[VA_PTR]], 3; 33; CHECK-NEXT: and.b[[BITS]] [[VA_PTR_ALIGN:%(r|rd)[0-9]+]], [[VA_PTR_TMP]], -4; 34; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_NEXT:%(r|rd)[0-9]+]], [[VA_PTR_ALIGN]], 4; 35; CHECK-NEXT: st.u[[BITS]] [%SP], [[VA_PTR_NEXT]]; 36; CHECK-NEXT: ld.local.u32 %r{{[0-9]+}}, [[[VA_PTR_ALIGN]]]; 37 38 %0 = va_arg ptr %al, i32 39 40; Test va_arg(ap, int64_t) 41; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP]; 42; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_TMP:%(r|rd)[0-9]+]], [[VA_PTR]], 7; 43; CHECK-NEXT: and.b[[BITS]] [[VA_PTR_ALIGN:%(r|rd)[0-9]+]], [[VA_PTR_TMP]], -8; 44; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_NEXT:%(r|rd)[0-9]+]], [[VA_PTR_ALIGN]], 8; 45; CHECK-NEXT: st.u[[BITS]] [%SP], [[VA_PTR_NEXT]]; 46; CHECK-NEXT: ld.local.u64 %rd{{[0-9]+}}, [[[VA_PTR_ALIGN]]]; 47 48 %1 = va_arg ptr %al, i64 49 50; Test va_arg(ap, double) 51; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP]; 52; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_TMP:%(r|rd)[0-9]+]], [[VA_PTR]], 7; 53; CHECK-NEXT: and.b[[BITS]] [[VA_PTR_ALIGN:%(r|rd)[0-9]+]], [[VA_PTR_TMP]], -8; 54; CHECK-NEXT: add.s[[BITS]] [[VA_PTR_NEXT:%(r|rd)[0-9]+]], [[VA_PTR_ALIGN]], 8; 55; CHECK-NEXT: st.u[[BITS]] [%SP], [[VA_PTR_NEXT]]; 56; CHECK-NEXT: ld.local.f64 %fd{{[0-9]+}}, [[[VA_PTR_ALIGN]]]; 57 58 %2 = va_arg ptr %al, double 59 60; Test va_arg(ap, ptr) 61; CHECK-NEXT: ld.u[[BITS]] [[VA_PTR:%(r|rd)[0-9]+]], [%SP]; 62; CHECK32-NEXT: add.s32 [[VA_PTR_TMP:%r[0-9]+]], [[VA_PTR]], 3; 63; CHECK64-NEXT: add.s64 [[VA_PTR_TMP:%rd[0-9]+]], [[VA_PTR]], 7; 64; CHECK32-NEXT: and.b32 [[VA_PTR_ALIGN:%r[0-9]+]], [[VA_PTR_TMP]], -4; 65; CHECK64-NEXT: and.b64 [[VA_PTR_ALIGN:%rd[0-9]+]], [[VA_PTR_TMP]], -8; 66; CHECK32-NEXT: add.s32 [[VA_PTR_NEXT:%r[0-9]+]], [[VA_PTR_ALIGN]], 4; 67; CHECK64-NEXT: add.s64 [[VA_PTR_NEXT:%rd[0-9]+]], [[VA_PTR_ALIGN]], 8; 68; CHECK-NEXT: st.u[[BITS]] [%SP], [[VA_PTR_NEXT]]; 69; CHECK-NEXT: ld.local.u[[BITS]] %{{(r|rd)[0-9]+}}, [[[VA_PTR_ALIGN]]]; 70 71 %3 = va_arg ptr %al, ptr 72 %call = call i32 @bar(i32 %a, i32 %0, i64 %1, double %2, ptr %3) 73 74 call void @llvm.va_end(ptr %al) 75 %4 = va_arg ptr %al2, i32 76 call void @llvm.va_end(ptr %al2) 77 %5 = add i32 %call, %4 78 ret i32 %5 79} 80 81define i32 @test_foo(i32 %i, i64 %l, double %d, ptr %p) { 82; Test indirect variadic function call. 83 84; Load arguments to temporary variables 85; CHECK32: ld.param.u32 [[ARG_VOID_PTR:%r[0-9]+]], [test_foo_param_3]; 86; CHECK64: ld.param.u64 [[ARG_VOID_PTR:%rd[0-9]+]], [test_foo_param_3]; 87; CHECK-NEXT: ld.param.f64 [[ARG_DOUBLE:%fd[0-9]+]], [test_foo_param_2]; 88; CHECK-NEXT: ld.param.u64 [[ARG_I64:%rd[0-9]+]], [test_foo_param_1]; 89; CHECK-NEXT: ld.param.u32 [[ARG_I32:%r[0-9]+]], [test_foo_param_0]; 90 91; Store arguments to an array 92; CHECK32: .param .align 8 .b8 param1[24]; 93; CHECK64: .param .align 8 .b8 param1[28]; 94; CHECK-NEXT: st.param.b32 [param1], [[ARG_I32]]; 95; CHECK-NEXT: st.param.b64 [param1+4], [[ARG_I64]]; 96; CHECK-NEXT: st.param.f64 [param1+12], [[ARG_DOUBLE]]; 97; CHECK-NEXT: st.param.b[[BITS]] [param1+20], [[ARG_VOID_PTR]]; 98; CHECK-NEXT: .param .b32 retval0; 99; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .align 8 .b8 _[] 100 101entry: 102 %ptr = load ptr, ptr addrspacecast (ptr addrspace(1) @foo_ptr to ptr), align 8 103 %call = call i32 (i32, ...) %ptr(i32 4, i32 %i, i64 %l, double %d, ptr %p) 104 ret i32 %call 105} 106 107declare void @llvm.va_start(ptr) 108declare void @llvm.va_end(ptr) 109declare void @llvm.va_copy(ptr, ptr) 110declare i32 @bar(i32, i32, i64, double, ptr) 111