1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | FileCheck %s 3; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 -mattr=+ptx83 | %ptxas-verify -arch=sm_70 %} 4 5target triple = "nvptx64-nvidia-cuda" 6 7@u128_max = internal addrspace(1) global i128 0, align 16 8@u128_zero = internal addrspace(1) global i128 0, align 16 9@i128_max = internal addrspace(1) global i128 0, align 16 10@i128_min = internal addrspace(1) global i128 0, align 16 11@v_u128_max = internal addrspace(1) global i128 0, align 16 12@v_u128_zero = internal addrspace(1) global i128 0, align 16 13@v_i128_max = internal addrspace(1) global i128 0, align 16 14@v_i128_min = internal addrspace(1) global i128 0, align 16 15@v64 = internal addrspace(1) global ptr null, align 8 16 17define void @test_corner_values() { 18; CHECK-LABEL: test_corner_values( 19; CHECK: { 20; CHECK-NEXT: .reg .b64 %rd<24>; 21; CHECK-NEXT: .reg .b128 %rq<5>; 22; CHECK-EMPTY: 23; CHECK-NEXT: // %bb.0: 24; CHECK-NEXT: ld.global.u64 %rd1, [v64]; 25; CHECK-NEXT: add.s64 %rd2, %rd1, 8; 26; CHECK-NEXT: mov.b64 %rd13, -1; 27; CHECK-NEXT: mov.b128 %rq1, {%rd13, %rd13}; 28; CHECK-NEXT: mov.u64 %rd14, v_u128_max; 29; CHECK-NEXT: cvta.global.u64 %rd3, %rd14; 30; CHECK-NEXT: // begin inline asm 31; CHECK-NEXT: { 32; CHECK-NEXT: .reg .b64 hi; 33; CHECK-NEXT: .reg .b64 lo; 34; CHECK-NEXT: mov.b128 {lo, hi}, %rq1; 35; CHECK-NEXT: st.b64 [%rd1], lo; 36; CHECK-NEXT: st.b64 [%rd2], hi; 37; CHECK-NEXT: st.b128 [%rd3], %rq1; 38; CHECK-NEXT: } 39; CHECK-NEXT: // end inline asm 40; CHECK-NEXT: ld.global.u64 %rd15, [v64]; 41; CHECK-NEXT: add.s64 %rd4, %rd15, 16; 42; CHECK-NEXT: add.s64 %rd5, %rd15, 24; 43; CHECK-NEXT: mov.b64 %rd16, 9223372036854775807; 44; CHECK-NEXT: mov.b128 %rq2, {%rd13, %rd16}; 45; CHECK-NEXT: mov.u64 %rd17, v_i128_max; 46; CHECK-NEXT: cvta.global.u64 %rd6, %rd17; 47; CHECK-NEXT: // begin inline asm 48; CHECK-NEXT: { 49; CHECK-NEXT: .reg .b64 hi; 50; CHECK-NEXT: .reg .b64 lo; 51; CHECK-NEXT: mov.b128 {lo, hi}, %rq2; 52; CHECK-NEXT: st.b64 [%rd4], lo; 53; CHECK-NEXT: st.b64 [%rd5], hi; 54; CHECK-NEXT: st.b128 [%rd6], %rq2; 55; CHECK-NEXT: } 56; CHECK-NEXT: // end inline asm 57; CHECK-NEXT: ld.global.u64 %rd18, [v64]; 58; CHECK-NEXT: add.s64 %rd7, %rd18, 32; 59; CHECK-NEXT: add.s64 %rd8, %rd18, 40; 60; CHECK-NEXT: mov.b64 %rd19, -9223372036854775808; 61; CHECK-NEXT: mov.b64 %rd20, 0; 62; CHECK-NEXT: mov.b128 %rq3, {%rd20, %rd19}; 63; CHECK-NEXT: mov.u64 %rd21, v_i128_min; 64; CHECK-NEXT: cvta.global.u64 %rd9, %rd21; 65; CHECK-NEXT: // begin inline asm 66; CHECK-NEXT: { 67; CHECK-NEXT: .reg .b64 hi; 68; CHECK-NEXT: .reg .b64 lo; 69; CHECK-NEXT: mov.b128 {lo, hi}, %rq3; 70; CHECK-NEXT: st.b64 [%rd7], lo; 71; CHECK-NEXT: st.b64 [%rd8], hi; 72; CHECK-NEXT: st.b128 [%rd9], %rq3; 73; CHECK-NEXT: } 74; CHECK-NEXT: // end inline asm 75; CHECK-NEXT: ld.global.u64 %rd22, [v64]; 76; CHECK-NEXT: add.s64 %rd10, %rd22, 48; 77; CHECK-NEXT: add.s64 %rd11, %rd22, 56; 78; CHECK-NEXT: mov.b128 %rq4, {%rd20, %rd20}; 79; CHECK-NEXT: mov.u64 %rd23, v_u128_zero; 80; CHECK-NEXT: cvta.global.u64 %rd12, %rd23; 81; CHECK-NEXT: // begin inline asm 82; CHECK-NEXT: { 83; CHECK-NEXT: .reg .b64 hi; 84; CHECK-NEXT: .reg .b64 lo; 85; CHECK-NEXT: mov.b128 {lo, hi}, %rq4; 86; CHECK-NEXT: st.b64 [%rd10], lo; 87; CHECK-NEXT: st.b64 [%rd11], hi; 88; CHECK-NEXT: st.b128 [%rd12], %rq4; 89; CHECK-NEXT: } 90; CHECK-NEXT: // end inline asm 91; CHECK-NEXT: ret; 92 93 %1 = load ptr, ptr addrspace(1) @v64, align 8 94 %2 = getelementptr inbounds i64, ptr %1, i64 1 95 tail call void asm sideeffect "{\0A\09.reg .b64 hi;\0A\09.reg .b64 lo;\0A\09mov.b128 {lo, hi}, $0;\0A\09st.b64 [$1], lo;\0A\09st.b64 [$2], hi;\0A\09st.b128 [$3], $0;\0A\09}", "q,l,l,l"(i128 -1, ptr %1, ptr nonnull %2, ptr nonnull addrspacecast (ptr addrspace(1) @v_u128_max to ptr)) 96 %3 = load ptr, ptr addrspace(1) @v64, align 8 97 %4 = getelementptr inbounds i64, ptr %3, i64 2 98 %5 = getelementptr inbounds i64, ptr %3, i64 3 99 tail call void asm sideeffect "{\0A\09.reg .b64 hi;\0A\09.reg .b64 lo;\0A\09mov.b128 {lo, hi}, $0;\0A\09st.b64 [$1], lo;\0A\09st.b64 [$2], hi;\0A\09st.b128 [$3], $0;\0A\09}", "q,l,l,l"(i128 170141183460469231731687303715884105727, ptr nonnull %4, ptr nonnull %5, ptr nonnull addrspacecast (ptr addrspace(1) @v_i128_max to ptr)) 100 %6 = load ptr, ptr addrspace(1) @v64, align 8 101 %7 = getelementptr inbounds i64, ptr %6, i64 4 102 %8 = getelementptr inbounds i64, ptr %6, i64 5 103 tail call void asm sideeffect "{\0A\09.reg .b64 hi;\0A\09.reg .b64 lo;\0A\09mov.b128 {lo, hi}, $0;\0A\09st.b64 [$1], lo;\0A\09st.b64 [$2], hi;\0A\09st.b128 [$3], $0;\0A\09}", "q,l,l,l"(i128 -170141183460469231731687303715884105728, ptr nonnull %7, ptr nonnull %8, ptr nonnull addrspacecast (ptr addrspace(1) @v_i128_min to ptr)) 104 %9 = load ptr, ptr addrspace(1) @v64, align 8 105 %10 = getelementptr inbounds i64, ptr %9, i64 6 106 %11 = getelementptr inbounds i64, ptr %9, i64 7 107 tail call void asm sideeffect "{\0A\09.reg .b64 hi;\0A\09.reg .b64 lo;\0A\09mov.b128 {lo, hi}, $0;\0A\09st.b64 [$1], lo;\0A\09st.b64 [$2], hi;\0A\09st.b128 [$3], $0;\0A\09}", "q,l,l,l"(i128 0, ptr nonnull %10, ptr nonnull %11, ptr nonnull addrspacecast (ptr addrspace(1) @v_u128_zero to ptr)) 108 ret void 109} 110