1; RUN: llc < %s -mtriple=nvptx | FileCheck %s 2; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -verify-machineinstrs | %ptxas-verify %} 3 4target triple = "nvptx64-nvidia-cuda" 5 6%struct.float2 = type { float, float } 7 8; CHECK-LABEL: .visible .func (.param .b32 func_retval0) callee_md 9; CHECK-NEXT: ( 10; CHECK-NEXT: .param .align 8 .b8 callee_md_param_0[8] 11; CHECK-NEXT: ) 12; CHECK-NEXT: ; 13 14; CHECK-LABEL: .visible .func (.param .b32 func_retval0) callee 15; CHECK-NEXT: ( 16; CHECK-NEXT: .param .align 8 .b8 callee_param_0[8] 17; CHECK-NEXT: ) 18; CHECK-NEXT: ; 19 20define float @caller_md(float %a, float %b) { 21; CHECK-LABEL: .visible .func (.param .b32 func_retval0) caller_md( 22; CHECK-NEXT: .param .b32 caller_md_param_0, 23; CHECK-NEXT: .param .b32 caller_md_param_1 24; CHECK-NEXT: ) 25; CHECK-NEXT: { 26 27; CHECK: ld.param.f32 %f1, [caller_md_param_0]; 28; CHECK-NEXT: ld.param.f32 %f2, [caller_md_param_1]; 29; CHECK-NEXT: { 30; CHECK-NEXT: .param .align 8 .b8 param0[8]; 31; CHECK-NEXT: st.param.v2.f32 [param0], {%f1, %f2}; 32; CHECK-NEXT: .param .b32 retval0; 33; CHECK-NEXT: call.uni (retval0), 34; CHECK-NEXT: callee_md, 35; CHECK-NEXT: ( 36; CHECK-NEXT: param0 37; CHECK-NEXT: ); 38; CHECK-NEXT: ld.param.f32 %f3, [retval0]; 39; CHECK-NEXT: } 40; CHECK-NEXT: st.param.f32 [func_retval0], %f3; 41; CHECK-NEXT: ret; 42 %s1 = insertvalue %struct.float2 poison, float %a, 0 43 %s2 = insertvalue %struct.float2 %s1, float %b, 1 44 %r = call float @callee_md(%struct.float2 %s2) 45 ret float %r 46} 47 48define float @callee_md(%struct.float2 %a) { 49; CHECK-LABEL: .visible .func (.param .b32 func_retval0) callee_md( 50; CHECK-NEXT: .param .align 8 .b8 callee_md_param_0[8] 51; CHECK-NEXT: ) 52; CHECK-NEXT: { 53 54; CHECK: ld.param.v2.f32 {%f1, %f2}, [callee_md_param_0]; 55; CHECK-NEXT: add.rn.f32 %f3, %f1, %f2; 56; CHECK-NEXT: st.param.f32 [func_retval0], %f3; 57; CHECK-NEXT: ret; 58 %v0 = extractvalue %struct.float2 %a, 0 59 %v1 = extractvalue %struct.float2 %a, 1 60 %2 = fadd float %v0, %v1 61 ret float %2 62} 63 64define float @caller(float %a, float %b) { 65; CHECK-LABEL: .visible .func (.param .b32 func_retval0) caller( 66; CHECK-NEXT: .param .b32 caller_param_0, 67; CHECK-NEXT: .param .b32 caller_param_1 68; CHECK-NEXT: ) 69; CHECK-NEXT: { 70 71; CHECK: ld.param.f32 %f1, [caller_param_0]; 72; CHECK-NEXT: ld.param.f32 %f2, [caller_param_1]; 73; CHECK-NEXT: { 74; CHECK-NEXT: .param .align 8 .b8 param0[8]; 75; CHECK-NEXT: st.param.v2.f32 [param0], {%f1, %f2}; 76; CHECK-NEXT: .param .b32 retval0; 77; CHECK-NEXT: call.uni (retval0), 78; CHECK-NEXT: callee, 79; CHECK-NEXT: ( 80; CHECK-NEXT: param0 81; CHECK-NEXT: ); 82; CHECK-NEXT: ld.param.f32 %f3, [retval0]; 83; CHECK-NEXT: } 84; CHECK-NEXT: st.param.f32 [func_retval0], %f3; 85; CHECK-NEXT: ret; 86 %s1 = insertvalue %struct.float2 poison, float %a, 0 87 %s2 = insertvalue %struct.float2 %s1, float %b, 1 88 %r = call float @callee(%struct.float2 %s2) 89 ret float %r 90} 91 92define float @callee(%struct.float2 alignstack(8) %a ) { 93; CHECK-LABEL: .visible .func (.param .b32 func_retval0) callee( 94; CHECK-NEXT: .param .align 8 .b8 callee_param_0[8] 95; CHECK-NEXT: ) 96; CHECK-NEXT: { 97 98; CHECK: ld.param.v2.f32 {%f1, %f2}, [callee_param_0]; 99; CHECK-NEXT: add.rn.f32 %f3, %f1, %f2; 100; CHECK-NEXT: st.param.f32 [func_retval0], %f3; 101; CHECK-NEXT: ret; 102 %v0 = extractvalue %struct.float2 %a, 0 103 %v1 = extractvalue %struct.float2 %a, 1 104 %2 = fadd float %v0, %v1 105 ret float %2 106} 107 108!nvvm.annotations = !{!0} 109!0 = !{ptr @callee_md, !"align", i32 u0x00010008} 110