1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 5 2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | FileCheck %s 3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | %ptxas-verify -arch=sm_70 %} 4 5target triple = "nvptx64-nvidia-cuda" 6 7@size = internal addrspace(1) global i32 0, align 4 8@x = internal addrspace(1) global i128 0, align 16 9 10define void @test_b128_in_loop() { 11; CHECK-LABEL: test_b128_in_loop( 12; CHECK: { 13; CHECK-NEXT: .reg .pred %p<3>; 14; CHECK-NEXT: .reg .b64 %rd<15>; 15; CHECK-NEXT: .reg .b128 %rq<3>; 16; CHECK-EMPTY: 17; CHECK-NEXT: // %bb.0: 18; CHECK-NEXT: ld.global.s32 %rd1, [size]; 19; CHECK-NEXT: setp.eq.s64 %p1, %rd1, 0; 20; CHECK-NEXT: @%p1 bra $L__BB0_3; 21; CHECK-NEXT: // %bb.1: // %BB1 22; CHECK-NEXT: ld.global.u64 %rd13, [x+8]; 23; CHECK-NEXT: ld.global.u64 %rd12, [x]; 24; CHECK-NEXT: mov.b64 %rd14, 0; 25; CHECK-NEXT: $L__BB0_2: // %BB2 26; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 27; CHECK-NEXT: mov.b128 %rq1, {%rd12, %rd13}; 28; CHECK-NEXT: // begin inline asm 29; CHECK-NEXT: { 30; CHECK-NEXT: .reg .b64 lo; 31; CHECK-NEXT: .reg .b64 hi; 32; CHECK-NEXT: mov.b128 {lo, hi}, %rq1; 33; CHECK-NEXT: add.cc.u64 lo, lo, %rd14; 34; CHECK-NEXT: mov.b128 %rq1, {lo, hi}; 35; CHECK-NEXT: } 36; CHECK-NEXT: // end inline asm 37; CHECK-NEXT: mov.b128 {%rd12, %rd13}, %rq1; 38; CHECK-NEXT: st.global.u64 [x+8], %rd13; 39; CHECK-NEXT: st.global.u64 [x], %rd12; 40; CHECK-NEXT: add.s64 %rd14, %rd14, 1; 41; CHECK-NEXT: setp.ne.s64 %p2, %rd1, %rd14; 42; CHECK-NEXT: @%p2 bra $L__BB0_2; 43; CHECK-NEXT: $L__BB0_3: // %BB3 44; CHECK-NEXT: ret; 45 46 %1 = load i32, ptr addrspace(1) @size, align 4 47 %2 = icmp eq i32 %1, 0 48 br i1 %2, label %BB3, label %BB1 49 50BB1: ; preds = %0 51 %3 = load i128, ptr addrspace(1) @x, align 16 52 %4 = sext i32 %1 to i64 53 br label %BB2 54 55BB2: ; preds = %BB2, %BB1 56 %5 = phi i128 [ %7, %BB2 ], [ %3, %BB1 ] 57 %6 = phi i64 [ %9, %BB2 ], [ 0, %BB1 ] 58 %7 = tail call i128 asm "{\0A\09.reg .b64 lo;\0A\09.reg .b64 hi;\0A\09mov.b128 {lo, hi}, $0;\0A\09add.cc.u64 lo, lo, $1;\0A\09mov.b128 $0, {lo, hi};\0A\09}", "=q,l,0"(i64 %6, i128 %5) 59 %8 = bitcast i128 %7 to <2 x i64> 60 store <2 x i64> %8, ptr addrspace(1) @x, align 16 61 %9 = add nuw i64 %6, 1 62 %10 = icmp eq i64 %9, %4 63 br i1 %10, label %BB3, label %BB2 64 65BB3: ; preds = %BB2, %0 66 ret void 67} 68