1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s -mtriple=nvptx64-- -mtriple=nvptx64 -mcpu=sm_52 -mattr=+ptx64 < %s | FileCheck %s --check-prefix=CHECK-PTX 3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-- -mtriple=nvptx64 -mcpu=sm_52 -mattr=+ptx64 | %ptxas-verify %} 4 5%struct.S1 = type { i32, i8, i64 } 6%struct.S2 = type { i64, i64 } 7 8@__const.bar.s1 = private unnamed_addr constant %struct.S1 { i32 1, i8 1, i64 1 }, align 8 9@__const.qux.s = private unnamed_addr constant %struct.S2 { i64 1, i64 1 }, align 8 10 11define dso_local i32 @variadics1(i32 noundef %first, ...) { 12; CHECK-PTX-LABEL: variadics1( 13; CHECK-PTX: { 14; CHECK-PTX-NEXT: .reg .b32 %r<11>; 15; CHECK-PTX-NEXT: .reg .b64 %rd<11>; 16; CHECK-PTX-NEXT: .reg .f64 %fd<7>; 17; CHECK-PTX-EMPTY: 18; CHECK-PTX-NEXT: // %bb.0: // %entry 19; CHECK-PTX-NEXT: ld.param.u32 %r1, [variadics1_param_0]; 20; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics1_param_1]; 21; CHECK-PTX-NEXT: ld.u32 %r2, [%rd1]; 22; CHECK-PTX-NEXT: add.s32 %r3, %r1, %r2; 23; CHECK-PTX-NEXT: ld.u32 %r4, [%rd1+4]; 24; CHECK-PTX-NEXT: add.s32 %r5, %r3, %r4; 25; CHECK-PTX-NEXT: ld.u32 %r6, [%rd1+8]; 26; CHECK-PTX-NEXT: add.s32 %r7, %r5, %r6; 27; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 19; 28; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8; 29; CHECK-PTX-NEXT: ld.u64 %rd4, [%rd3]; 30; CHECK-PTX-NEXT: cvt.u64.u32 %rd5, %r7; 31; CHECK-PTX-NEXT: add.s64 %rd6, %rd5, %rd4; 32; CHECK-PTX-NEXT: cvt.u32.u64 %r8, %rd6; 33; CHECK-PTX-NEXT: add.s64 %rd7, %rd3, 15; 34; CHECK-PTX-NEXT: and.b64 %rd8, %rd7, -8; 35; CHECK-PTX-NEXT: ld.f64 %fd1, [%rd8]; 36; CHECK-PTX-NEXT: cvt.rn.f64.s32 %fd2, %r8; 37; CHECK-PTX-NEXT: add.rn.f64 %fd3, %fd2, %fd1; 38; CHECK-PTX-NEXT: cvt.rzi.s32.f64 %r9, %fd3; 39; CHECK-PTX-NEXT: add.s64 %rd9, %rd8, 15; 40; CHECK-PTX-NEXT: and.b64 %rd10, %rd9, -8; 41; CHECK-PTX-NEXT: ld.f64 %fd4, [%rd10]; 42; CHECK-PTX-NEXT: cvt.rn.f64.s32 %fd5, %r9; 43; CHECK-PTX-NEXT: add.rn.f64 %fd6, %fd5, %fd4; 44; CHECK-PTX-NEXT: cvt.rzi.s32.f64 %r10, %fd6; 45; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r10; 46; CHECK-PTX-NEXT: ret; 47entry: 48 %vlist = alloca ptr, align 8 49 call void @llvm.va_start.p0(ptr %vlist) 50 %argp.cur = load ptr, ptr %vlist, align 8 51 %argp.next = getelementptr inbounds i8, ptr %argp.cur, i64 4 52 store ptr %argp.next, ptr %vlist, align 8 53 %0 = load i32, ptr %argp.cur, align 4 54 %add = add nsw i32 %first, %0 55 %argp.cur1 = load ptr, ptr %vlist, align 8 56 %argp.next2 = getelementptr inbounds i8, ptr %argp.cur1, i64 4 57 store ptr %argp.next2, ptr %vlist, align 8 58 %1 = load i32, ptr %argp.cur1, align 4 59 %add3 = add nsw i32 %add, %1 60 %argp.cur4 = load ptr, ptr %vlist, align 8 61 %argp.next5 = getelementptr inbounds i8, ptr %argp.cur4, i64 4 62 store ptr %argp.next5, ptr %vlist, align 8 63 %2 = load i32, ptr %argp.cur4, align 4 64 %add6 = add nsw i32 %add3, %2 65 %argp.cur7 = load ptr, ptr %vlist, align 8 66 %3 = getelementptr inbounds i8, ptr %argp.cur7, i32 7 67 %argp.cur7.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %3, i64 -8) 68 %argp.next8 = getelementptr inbounds i8, ptr %argp.cur7.aligned, i64 8 69 store ptr %argp.next8, ptr %vlist, align 8 70 %4 = load i64, ptr %argp.cur7.aligned, align 8 71 %conv = sext i32 %add6 to i64 72 %add9 = add nsw i64 %conv, %4 73 %conv10 = trunc i64 %add9 to i32 74 %argp.cur11 = load ptr, ptr %vlist, align 8 75 %5 = getelementptr inbounds i8, ptr %argp.cur11, i32 7 76 %argp.cur11.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %5, i64 -8) 77 %argp.next12 = getelementptr inbounds i8, ptr %argp.cur11.aligned, i64 8 78 store ptr %argp.next12, ptr %vlist, align 8 79 %6 = load double, ptr %argp.cur11.aligned, align 8 80 %conv13 = sitofp i32 %conv10 to double 81 %add14 = fadd double %conv13, %6 82 %conv15 = fptosi double %add14 to i32 83 %argp.cur16 = load ptr, ptr %vlist, align 8 84 %7 = getelementptr inbounds i8, ptr %argp.cur16, i32 7 85 %argp.cur16.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %7, i64 -8) 86 %argp.next17 = getelementptr inbounds i8, ptr %argp.cur16.aligned, i64 8 87 store ptr %argp.next17, ptr %vlist, align 8 88 %8 = load double, ptr %argp.cur16.aligned, align 8 89 %conv18 = sitofp i32 %conv15 to double 90 %add19 = fadd double %conv18, %8 91 %conv20 = fptosi double %add19 to i32 92 call void @llvm.va_end.p0(ptr %vlist) 93 ret i32 %conv20 94} 95 96declare void @llvm.va_start.p0(ptr) 97 98declare ptr @llvm.ptrmask.p0.i64(ptr, i64) 99 100declare void @llvm.va_end.p0(ptr) 101 102define dso_local i32 @foo() { 103; CHECK-PTX-LABEL: foo( 104; CHECK-PTX: { 105; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot1[40]; 106; CHECK-PTX-NEXT: .reg .b64 %SP; 107; CHECK-PTX-NEXT: .reg .b64 %SPL; 108; CHECK-PTX-NEXT: .reg .b32 %r<4>; 109; CHECK-PTX-NEXT: .reg .b64 %rd<5>; 110; CHECK-PTX-EMPTY: 111; CHECK-PTX-NEXT: // %bb.0: // %entry 112; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot1; 113; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; 114; CHECK-PTX-NEXT: mov.b64 %rd1, 4294967297; 115; CHECK-PTX-NEXT: st.u64 [%SP], %rd1; 116; CHECK-PTX-NEXT: mov.b32 %r1, 1; 117; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1; 118; CHECK-PTX-NEXT: mov.b64 %rd2, 1; 119; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd2; 120; CHECK-PTX-NEXT: mov.b64 %rd3, 4607182418800017408; 121; CHECK-PTX-NEXT: st.u64 [%SP+24], %rd3; 122; CHECK-PTX-NEXT: st.u64 [%SP+32], %rd3; 123; CHECK-PTX-NEXT: add.u64 %rd4, %SP, 0; 124; CHECK-PTX-NEXT: { // callseq 0, 0 125; CHECK-PTX-NEXT: .param .b32 param0; 126; CHECK-PTX-NEXT: st.param.b32 [param0], 1; 127; CHECK-PTX-NEXT: .param .b64 param1; 128; CHECK-PTX-NEXT: st.param.b64 [param1], %rd4; 129; CHECK-PTX-NEXT: .param .b32 retval0; 130; CHECK-PTX-NEXT: call.uni (retval0), 131; CHECK-PTX-NEXT: variadics1, 132; CHECK-PTX-NEXT: ( 133; CHECK-PTX-NEXT: param0, 134; CHECK-PTX-NEXT: param1 135; CHECK-PTX-NEXT: ); 136; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0]; 137; CHECK-PTX-NEXT: } // callseq 0 138; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r2; 139; CHECK-PTX-NEXT: ret; 140entry: 141 %conv = sext i8 1 to i32 142 %conv1 = sext i16 1 to i32 143 %conv2 = fpext float 1.000000e+00 to double 144 %call = call i32 (i32, ...) @variadics1(i32 noundef 1, i32 noundef %conv, i32 noundef %conv1, i32 noundef 1, i64 noundef 1, double noundef %conv2, double noundef 1.000000e+00) 145 ret i32 %call 146} 147 148define dso_local i32 @variadics2(i32 noundef %first, ...) { 149; CHECK-PTX-LABEL: variadics2( 150; CHECK-PTX: { 151; CHECK-PTX-NEXT: .local .align 2 .b8 __local_depot2[4]; 152; CHECK-PTX-NEXT: .reg .b64 %SP; 153; CHECK-PTX-NEXT: .reg .b64 %SPL; 154; CHECK-PTX-NEXT: .reg .b16 %rs<6>; 155; CHECK-PTX-NEXT: .reg .b32 %r<7>; 156; CHECK-PTX-NEXT: .reg .b64 %rd<7>; 157; CHECK-PTX-EMPTY: 158; CHECK-PTX-NEXT: // %bb.0: // %entry 159; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot2; 160; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; 161; CHECK-PTX-NEXT: ld.param.u32 %r1, [variadics2_param_0]; 162; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics2_param_1]; 163; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7; 164; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8; 165; CHECK-PTX-NEXT: ld.u32 %r2, [%rd3]; 166; CHECK-PTX-NEXT: ld.s8 %r3, [%rd3+4]; 167; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd3+7]; 168; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs1; 169; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd3+5]; 170; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd3+6]; 171; CHECK-PTX-NEXT: shl.b16 %rs4, %rs3, 8; 172; CHECK-PTX-NEXT: or.b16 %rs5, %rs4, %rs2; 173; CHECK-PTX-NEXT: st.u16 [%SP], %rs5; 174; CHECK-PTX-NEXT: ld.u64 %rd4, [%rd3+8]; 175; CHECK-PTX-NEXT: add.s32 %r4, %r1, %r2; 176; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3; 177; CHECK-PTX-NEXT: cvt.u64.u32 %rd5, %r5; 178; CHECK-PTX-NEXT: add.s64 %rd6, %rd5, %rd4; 179; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd6; 180; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r6; 181; CHECK-PTX-NEXT: ret; 182entry: 183 %vlist = alloca ptr, align 8 184 %s1.sroa.3 = alloca [3 x i8], align 1 185 call void @llvm.va_start.p0(ptr %vlist) 186 %argp.cur = load ptr, ptr %vlist, align 8 187 %0 = getelementptr inbounds i8, ptr %argp.cur, i32 7 188 %argp.cur.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %0, i64 -8) 189 %argp.next = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 16 190 store ptr %argp.next, ptr %vlist, align 8 191 %s1.sroa.0.0.copyload = load i32, ptr %argp.cur.aligned, align 8 192 %s1.sroa.2.0.argp.cur.aligned.sroa_idx = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 4 193 %s1.sroa.2.0.copyload = load i8, ptr %s1.sroa.2.0.argp.cur.aligned.sroa_idx, align 4 194 %s1.sroa.3.0.argp.cur.aligned.sroa_idx = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 5 195 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %s1.sroa.3, ptr align 1 %s1.sroa.3.0.argp.cur.aligned.sroa_idx, i64 3, i1 false) 196 %s1.sroa.31.0.argp.cur.aligned.sroa_idx = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 8 197 %s1.sroa.31.0.copyload = load i64, ptr %s1.sroa.31.0.argp.cur.aligned.sroa_idx, align 8 198 %add = add nsw i32 %first, %s1.sroa.0.0.copyload 199 %conv = sext i8 %s1.sroa.2.0.copyload to i32 200 %add1 = add nsw i32 %add, %conv 201 %conv2 = sext i32 %add1 to i64 202 %add3 = add nsw i64 %conv2, %s1.sroa.31.0.copyload 203 %conv4 = trunc i64 %add3 to i32 204 call void @llvm.va_end.p0(ptr %vlist) 205 ret i32 %conv4 206} 207 208declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) 209 210define dso_local i32 @bar() { 211; CHECK-PTX-LABEL: bar( 212; CHECK-PTX: { 213; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot3[24]; 214; CHECK-PTX-NEXT: .reg .b64 %SP; 215; CHECK-PTX-NEXT: .reg .b64 %SPL; 216; CHECK-PTX-NEXT: .reg .b16 %rs<10>; 217; CHECK-PTX-NEXT: .reg .b32 %r<4>; 218; CHECK-PTX-NEXT: .reg .b64 %rd<7>; 219; CHECK-PTX-EMPTY: 220; CHECK-PTX-NEXT: // %bb.0: // %entry 221; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot3; 222; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; 223; CHECK-PTX-NEXT: mov.u64 %rd1, __const_$_bar_$_s1; 224; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7; 225; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd2]; 226; CHECK-PTX-NEXT: cvt.u16.u8 %rs2, %rs1; 227; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs2; 228; CHECK-PTX-NEXT: add.s64 %rd3, %rd1, 5; 229; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd3]; 230; CHECK-PTX-NEXT: cvt.u16.u8 %rs4, %rs3; 231; CHECK-PTX-NEXT: add.s64 %rd4, %rd1, 6; 232; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd4]; 233; CHECK-PTX-NEXT: cvt.u16.u8 %rs6, %rs5; 234; CHECK-PTX-NEXT: shl.b16 %rs7, %rs6, 8; 235; CHECK-PTX-NEXT: or.b16 %rs8, %rs7, %rs4; 236; CHECK-PTX-NEXT: st.u16 [%SP], %rs8; 237; CHECK-PTX-NEXT: mov.b32 %r1, 1; 238; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1; 239; CHECK-PTX-NEXT: mov.b16 %rs9, 1; 240; CHECK-PTX-NEXT: st.u8 [%SP+12], %rs9; 241; CHECK-PTX-NEXT: mov.b64 %rd5, 1; 242; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5; 243; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 8; 244; CHECK-PTX-NEXT: { // callseq 1, 0 245; CHECK-PTX-NEXT: .param .b32 param0; 246; CHECK-PTX-NEXT: st.param.b32 [param0], 1; 247; CHECK-PTX-NEXT: .param .b64 param1; 248; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6; 249; CHECK-PTX-NEXT: .param .b32 retval0; 250; CHECK-PTX-NEXT: call.uni (retval0), 251; CHECK-PTX-NEXT: variadics2, 252; CHECK-PTX-NEXT: ( 253; CHECK-PTX-NEXT: param0, 254; CHECK-PTX-NEXT: param1 255; CHECK-PTX-NEXT: ); 256; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0]; 257; CHECK-PTX-NEXT: } // callseq 1 258; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r2; 259; CHECK-PTX-NEXT: ret; 260entry: 261 %s1.sroa.3 = alloca [3 x i8], align 1 262 %s1.sroa.0.0.copyload = load i32, ptr @__const.bar.s1, align 8 263 %s1.sroa.2.0.copyload = load i8, ptr getelementptr inbounds (i8, ptr @__const.bar.s1, i64 4), align 4 264 call void @llvm.memcpy.p0.p0.i64(ptr align 1 %s1.sroa.3, ptr align 1 getelementptr inbounds (i8, ptr @__const.bar.s1, i64 5), i64 3, i1 false) 265 %s1.sroa.31.0.copyload = load i64, ptr getelementptr inbounds (i8, ptr @__const.bar.s1, i64 8), align 8 266 %call = call i32 (i32, ...) @variadics2(i32 noundef 1, i32 %s1.sroa.0.0.copyload, i8 %s1.sroa.2.0.copyload, i64 %s1.sroa.31.0.copyload) 267 ret i32 %call 268} 269 270define dso_local i32 @variadics3(i32 noundef %first, ...) { 271; CHECK-PTX-LABEL: variadics3( 272; CHECK-PTX: { 273; CHECK-PTX-NEXT: .reg .b32 %r<8>; 274; CHECK-PTX-NEXT: .reg .b64 %rd<4>; 275; CHECK-PTX-EMPTY: 276; CHECK-PTX-NEXT: // %bb.0: // %entry 277; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics3_param_1]; 278; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 15; 279; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -16; 280; CHECK-PTX-NEXT: ld.v4.u32 {%r1, %r2, %r3, %r4}, [%rd3]; 281; CHECK-PTX-NEXT: add.s32 %r5, %r1, %r2; 282; CHECK-PTX-NEXT: add.s32 %r6, %r5, %r3; 283; CHECK-PTX-NEXT: add.s32 %r7, %r6, %r4; 284; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r7; 285; CHECK-PTX-NEXT: ret; 286entry: 287 %vlist = alloca ptr, align 8 288 call void @llvm.va_start.p0(ptr %vlist) 289 %argp.cur = load ptr, ptr %vlist, align 8 290 %0 = getelementptr inbounds i8, ptr %argp.cur, i32 15 291 %argp.cur.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %0, i64 -16) 292 %argp.next = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 16 293 store ptr %argp.next, ptr %vlist, align 8 294 %1 = load <4 x i32>, ptr %argp.cur.aligned, align 16 295 call void @llvm.va_end.p0(ptr %vlist) 296 %2 = extractelement <4 x i32> %1, i64 0 297 %3 = extractelement <4 x i32> %1, i64 1 298 %add = add nsw i32 %2, %3 299 %4 = extractelement <4 x i32> %1, i64 2 300 %add1 = add nsw i32 %add, %4 301 %5 = extractelement <4 x i32> %1, i64 3 302 %add2 = add nsw i32 %add1, %5 303 ret i32 %add2 304} 305 306define dso_local i32 @baz() { 307; CHECK-PTX-LABEL: baz( 308; CHECK-PTX: { 309; CHECK-PTX-NEXT: .local .align 16 .b8 __local_depot5[16]; 310; CHECK-PTX-NEXT: .reg .b64 %SP; 311; CHECK-PTX-NEXT: .reg .b64 %SPL; 312; CHECK-PTX-NEXT: .reg .b32 %r<4>; 313; CHECK-PTX-NEXT: .reg .b64 %rd<2>; 314; CHECK-PTX-EMPTY: 315; CHECK-PTX-NEXT: // %bb.0: // %entry 316; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot5; 317; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; 318; CHECK-PTX-NEXT: mov.b32 %r1, 1; 319; CHECK-PTX-NEXT: st.v4.u32 [%SP], {%r1, %r1, %r1, %r1}; 320; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0; 321; CHECK-PTX-NEXT: { // callseq 2, 0 322; CHECK-PTX-NEXT: .param .b32 param0; 323; CHECK-PTX-NEXT: st.param.b32 [param0], 1; 324; CHECK-PTX-NEXT: .param .b64 param1; 325; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1; 326; CHECK-PTX-NEXT: .param .b32 retval0; 327; CHECK-PTX-NEXT: call.uni (retval0), 328; CHECK-PTX-NEXT: variadics3, 329; CHECK-PTX-NEXT: ( 330; CHECK-PTX-NEXT: param0, 331; CHECK-PTX-NEXT: param1 332; CHECK-PTX-NEXT: ); 333; CHECK-PTX-NEXT: ld.param.b32 %r2, [retval0]; 334; CHECK-PTX-NEXT: } // callseq 2 335; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r2; 336; CHECK-PTX-NEXT: ret; 337entry: 338 %call = call i32 (i32, ...) @variadics3(i32 noundef 1, <4 x i32> noundef <i32 1, i32 1, i32 1, i32 1>) 339 ret i32 %call 340} 341 342define dso_local i32 @variadics4(ptr noundef byval(%struct.S2) align 8 %first, ...) { 343; CHECK-PTX-LABEL: variadics4( 344; CHECK-PTX: { 345; CHECK-PTX-NEXT: .reg .b32 %r<2>; 346; CHECK-PTX-NEXT: .reg .b64 %rd<9>; 347; CHECK-PTX-EMPTY: 348; CHECK-PTX-NEXT: // %bb.0: // %entry 349; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics4_param_1]; 350; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7; 351; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8; 352; CHECK-PTX-NEXT: ld.u64 %rd4, [%rd3]; 353; CHECK-PTX-NEXT: ld.param.u64 %rd5, [variadics4_param_0]; 354; CHECK-PTX-NEXT: ld.param.u64 %rd6, [variadics4_param_0+8]; 355; CHECK-PTX-NEXT: add.s64 %rd7, %rd5, %rd6; 356; CHECK-PTX-NEXT: add.s64 %rd8, %rd7, %rd4; 357; CHECK-PTX-NEXT: cvt.u32.u64 %r1, %rd8; 358; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r1; 359; CHECK-PTX-NEXT: ret; 360entry: 361 %vlist = alloca ptr, align 8 362 call void @llvm.va_start.p0(ptr %vlist) 363 %argp.cur = load ptr, ptr %vlist, align 8 364 %0 = getelementptr inbounds i8, ptr %argp.cur, i32 7 365 %argp.cur.aligned = call ptr @llvm.ptrmask.p0.i64(ptr %0, i64 -8) 366 %argp.next = getelementptr inbounds i8, ptr %argp.cur.aligned, i64 8 367 store ptr %argp.next, ptr %vlist, align 8 368 %1 = load i64, ptr %argp.cur.aligned, align 8 369 %x1 = getelementptr inbounds %struct.S2, ptr %first, i32 0, i32 0 370 %2 = load i64, ptr %x1, align 8 371 %y = getelementptr inbounds %struct.S2, ptr %first, i32 0, i32 1 372 %3 = load i64, ptr %y, align 8 373 %add = add nsw i64 %2, %3 374 %add2 = add nsw i64 %add, %1 375 %conv = trunc i64 %add2 to i32 376 call void @llvm.va_end.p0(ptr %vlist) 377 ret i32 %conv 378} 379 380define dso_local void @qux() { 381; CHECK-PTX-LABEL: qux( 382; CHECK-PTX: { 383; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot7[24]; 384; CHECK-PTX-NEXT: .reg .b64 %SP; 385; CHECK-PTX-NEXT: .reg .b64 %SPL; 386; CHECK-PTX-NEXT: .reg .b32 %r<3>; 387; CHECK-PTX-NEXT: .reg .b64 %rd<7>; 388; CHECK-PTX-EMPTY: 389; CHECK-PTX-NEXT: // %bb.0: // %entry 390; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot7; 391; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; 392; CHECK-PTX-NEXT: ld.global.nc.u64 %rd1, [__const_$_qux_$_s]; 393; CHECK-PTX-NEXT: st.u64 [%SP], %rd1; 394; CHECK-PTX-NEXT: mov.u64 %rd2, __const_$_qux_$_s; 395; CHECK-PTX-NEXT: add.s64 %rd3, %rd2, 8; 396; CHECK-PTX-NEXT: ld.global.nc.u64 %rd4, [%rd3]; 397; CHECK-PTX-NEXT: st.u64 [%SP+8], %rd4; 398; CHECK-PTX-NEXT: mov.b64 %rd5, 1; 399; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5; 400; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 16; 401; CHECK-PTX-NEXT: { // callseq 3, 0 402; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16]; 403; CHECK-PTX-NEXT: st.param.b64 [param0], %rd1; 404; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd4; 405; CHECK-PTX-NEXT: .param .b64 param1; 406; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6; 407; CHECK-PTX-NEXT: .param .b32 retval0; 408; CHECK-PTX-NEXT: call.uni (retval0), 409; CHECK-PTX-NEXT: variadics4, 410; CHECK-PTX-NEXT: ( 411; CHECK-PTX-NEXT: param0, 412; CHECK-PTX-NEXT: param1 413; CHECK-PTX-NEXT: ); 414; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0]; 415; CHECK-PTX-NEXT: } // callseq 3 416; CHECK-PTX-NEXT: ret; 417entry: 418 %s = alloca %struct.S2, align 8 419 call void @llvm.memcpy.p0.p0.i64(ptr align 8 %s, ptr align 8 @__const.qux.s, i64 16, i1 false) 420 %call = call i32 (ptr, ...) @variadics4(ptr noundef byval(%struct.S2) align 8 %s, i64 noundef 1) 421 ret void 422} 423