1; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s 2; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} 3 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" 5target triple = "nvptx64-nvidia-cuda" 6 7; CHECK-LABEL: t1 8define <4 x float> @t1(ptr %p1) { 9; CHECK-NOT: ld.v4 10; CHECK-NOT: ld.v2 11; CHECK-NOT: ld.f32 12; CHECK: ld.u8 13 %r = load <4 x float>, ptr %p1, align 1 14 ret <4 x float> %r 15} 16 17; CHECK-LABEL: t2 18define <4 x float> @t2(ptr %p1) { 19; CHECK-NOT: ld.v4 20; CHECK-NOT: ld.v2 21; CHECK: ld.f32 22 %r = load <4 x float>, ptr %p1, align 4 23 ret <4 x float> %r 24} 25 26; CHECK-LABEL: t3 27define <4 x float> @t3(ptr %p1) { 28; CHECK-NOT: ld.v4 29; CHECK: ld.v2 30 %r = load <4 x float>, ptr %p1, align 8 31 ret <4 x float> %r 32} 33 34; CHECK-LABEL: t4 35define <4 x float> @t4(ptr %p1) { 36; CHECK: ld.v4 37 %r = load <4 x float>, ptr %p1, align 16 38 ret <4 x float> %r 39} 40 41; CHECK-LABEL: .visible .func test_v1halfp0a1( 42; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_v1halfp0a1_param_0]; 43; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_v1halfp0a1_param_1]; 44; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]] 45; CHECK-DAG: st.u8 [%[[TO]]], [[B0]] 46; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1] 47; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]] 48; CHECK: ret 49define void @test_v1halfp0a1(ptr noalias readonly %from, ptr %to) { 50 %1 = load <1 x half>, ptr %from , align 1 51 store <1 x half> %1, ptr %to , align 1 52 ret void 53} 54 55; CHECK-LABEL: .visible .func test_v2halfp0a1( 56; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_v2halfp0a1_param_0]; 57; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_v2halfp0a1_param_1]; 58; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]] 59; CHECK-DAG: st.u8 [%[[TO]]], 60; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1] 61; CHECK-DAG: st.u8 [%[[TO]]+1], 62; CHECK-DAG: ld.u8 [[B2:%r[sd]?[0-9]+]], [%[[FROM]]+2] 63; CHECK-DAG: st.u8 [%[[TO]]+2], 64; CHECK-DAG: ld.u8 [[B3:%r[sd]?[0-9]+]], [%[[FROM]]+3] 65; CHECK-DAG: st.u8 [%[[TO]]+3], 66; CHECK: ret 67define void @test_v2halfp0a1(ptr noalias readonly %from, ptr %to) { 68 %1 = load <2 x half>, ptr %from , align 1 69 store <2 x half> %1, ptr %to , align 1 70 ret void 71} 72 73; CHECK-LABEL: .visible .func test_v4halfp0a1( 74; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_v4halfp0a1_param_0]; 75; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_v4halfp0a1_param_1]; 76; CHECK-DAG: ld.u8 [[B0:%r[sd]?[0-9]+]], [%[[FROM]]] 77; CHECK-DAG: st.u8 [%[[TO]]], [[B0]] 78; CHECK-DAG: ld.u8 [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1] 79; CHECK-DAG: st.u8 [%[[TO]]+1], [[B1]] 80; CHECK-DAG: ld.u8 [[B2:%r[sd]?[0-9]+]], [%[[FROM]]+2] 81; CHECK-DAG: st.u8 [%[[TO]]+2], [[B2]] 82; CHECK-DAG: ld.u8 [[B3:%r[sd]?[0-9]+]], [%[[FROM]]+3] 83; CHECK-DAG: st.u8 [%[[TO]]+3], [[B3]] 84; CHECK-DAG: ld.u8 [[B4:%r[sd]?[0-9]+]], [%[[FROM]]+4] 85; CHECK-DAG: st.u8 [%[[TO]]+4], [[B4]] 86; CHECK-DAG: ld.u8 [[B5:%r[sd]?[0-9]+]], [%[[FROM]]+5] 87; CHECK-DAG: st.u8 [%[[TO]]+5], [[B5]] 88; CHECK-DAG: ld.u8 [[B6:%r[sd]?[0-9]+]], [%[[FROM]]+6] 89; CHECK-DAG: st.u8 [%[[TO]]+6], [[B6]] 90; CHECK-DAG: ld.u8 [[B7:%r[sd]?[0-9]+]], [%[[FROM]]+7] 91; CHECK-DAG: st.u8 [%[[TO]]+7], [[B7]] 92; CHECK: ret 93define void @test_v4halfp0a1(ptr noalias readonly %from, ptr %to) { 94 %1 = load <4 x half>, ptr %from , align 1 95 store <4 x half> %1, ptr %to , align 1 96 ret void 97} 98 99 100; CHECK-LABEL: s1 101define void @s1(ptr %p1, <4 x float> %v) { 102; CHECK-NOT: st.v4 103; CHECK-NOT: st.v2 104; CHECK-NOT: st.f32 105; CHECK: st.u8 106 store <4 x float> %v, ptr %p1, align 1 107 ret void 108} 109 110; CHECK-LABEL: s2 111define void @s2(ptr %p1, <4 x float> %v) { 112; CHECK-NOT: st.v4 113; CHECK-NOT: st.v2 114; CHECK: st.f32 115 store <4 x float> %v, ptr %p1, align 4 116 ret void 117} 118 119; CHECK-LABEL: s3 120define void @s3(ptr %p1, <4 x float> %v) { 121; CHECK-NOT: st.v4 122 store <4 x float> %v, ptr %p1, align 8 123 ret void 124} 125 126; CHECK-LABEL: s4 127define void @s4(ptr %p1, <4 x float> %v) { 128; CHECK: st.v4 129 store <4 x float> %v, ptr %p1, align 16 130 ret void 131} 132 133