1; RUN: llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,CLS32,G32 2; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s -check-prefixes=ALL,NOPTRCONV,CLS64,G64 3; RUN: llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr| FileCheck %s -check-prefixes=ALL,PTRCONV,CLS64,G64 4; RUN: %if ptxas && !ptxas-12.0 %{ llc -O0 < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %} 5; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 | %ptxas-verify %} 6; RUN: %if ptxas %{ llc -O0 < %s -mtriple=nvptx64 -mcpu=sm_20 --nvptx-short-ptr | %ptxas-verify %} 7 8; ALL-LABEL: conv1 9define i32 @conv1(ptr addrspace(1) %ptr) { 10; G32: cvta.global.u32 11; ALL-NOT: cvt.u64.u32 12; G64: cvta.global.u64 13; ALL: ld.u32 14 %genptr = addrspacecast ptr addrspace(1) %ptr to ptr 15 %val = load i32, ptr %genptr 16 ret i32 %val 17} 18 19; ALL-LABEL: conv2 20define i32 @conv2(ptr addrspace(3) %ptr) { 21; CLS32: cvta.shared.u32 22; PTRCONV: cvt.u64.u32 23; NOPTRCONV-NOT: cvt.u64.u32 24; CLS64: cvta.shared.u64 25; ALL: ld.u32 26 %genptr = addrspacecast ptr addrspace(3) %ptr to ptr 27 %val = load i32, ptr %genptr 28 ret i32 %val 29} 30 31; ALL-LABEL: conv3 32define i32 @conv3(ptr addrspace(4) %ptr) { 33; CLS32: cvta.const.u32 34; PTRCONV: cvt.u64.u32 35; NOPTRCONV-NOT: cvt.u64.u32 36; CLS64: cvta.const.u64 37; ALL: ld.u32 38 %genptr = addrspacecast ptr addrspace(4) %ptr to ptr 39 %val = load i32, ptr %genptr 40 ret i32 %val 41} 42 43; ALL-LABEL: conv4 44define i32 @conv4(ptr addrspace(5) %ptr) { 45; CLS32: cvta.local.u32 46; PTRCONV: cvt.u64.u32 47; NOPTRCONV-NOT: cvt.u64.u32 48; CLS64: cvta.local.u64 49; ALL: ld.u32 50 %genptr = addrspacecast ptr addrspace(5) %ptr to ptr 51 %val = load i32, ptr %genptr 52 ret i32 %val 53} 54 55; ALL-LABEL: conv5 56define i32 @conv5(ptr %ptr) { 57; CLS32: cvta.to.global.u32 58; ALL-NOT: cvt.u64.u32 59; CLS64: cvta.to.global.u64 60; ALL: ld.global.u32 61 %specptr = addrspacecast ptr %ptr to ptr addrspace(1) 62 %val = load i32, ptr addrspace(1) %specptr 63 ret i32 %val 64} 65 66; ALL-LABEL: conv6 67define i32 @conv6(ptr %ptr) { 68; CLS32: cvta.to.shared.u32 69; CLS64: cvta.to.shared.u64 70; PTRCONV: cvt.u32.u64 71; NOPTRCONV-NOT: cvt.u32.u64 72; ALL: ld.shared.u32 73 %specptr = addrspacecast ptr %ptr to ptr addrspace(3) 74 %val = load i32, ptr addrspace(3) %specptr 75 ret i32 %val 76} 77 78; ALL-LABEL: conv7 79define i32 @conv7(ptr %ptr) { 80; CLS32: cvta.to.const.u32 81; CLS64: cvta.to.const.u64 82; PTRCONV: cvt.u32.u64 83; NOPTRCONV-NOT: cvt.u32.u64 84; ALL: ld.const.u32 85 %specptr = addrspacecast ptr %ptr to ptr addrspace(4) 86 %val = load i32, ptr addrspace(4) %specptr 87 ret i32 %val 88} 89 90; ALL-LABEL: conv8 91define i32 @conv8(ptr %ptr) { 92; CLS32: cvta.to.local.u32 93; CLS64: cvta.to.local.u64 94; PTRCONV: cvt.u32.u64 95; NOPTRCONV-NOT: cvt.u32.u64 96; ALL: ld.local.u32 97 %specptr = addrspacecast ptr %ptr to ptr addrspace(5) 98 %val = load i32, ptr addrspace(5) %specptr 99 ret i32 %val 100} 101 102; Check that we support addrspacecast when splitting the vector 103; result (<2 x ptr> => 2 x <1 x ptr>). 104; This also checks that scalarization works for addrspacecast 105; (when going from <1 x ptr> to ptr.) 106; ALL-LABEL: split1To0 107define void @split1To0(ptr nocapture noundef readonly %xs) { 108; CLS32: cvta.global.u32 109; CLS32: cvta.global.u32 110; CLS64: cvta.global.u64 111; CLS64: cvta.global.u64 112; ALL: st.u32 113; ALL: st.u32 114 %vec_addr = load <2 x ptr addrspace(1)>, ptr %xs, align 16 115 %addrspacecast = addrspacecast <2 x ptr addrspace(1)> %vec_addr to <2 x ptr> 116 %extractelement0 = extractelement <2 x ptr> %addrspacecast, i64 0 117 store float 0.5, ptr %extractelement0, align 4 118 %extractelement1 = extractelement <2 x ptr> %addrspacecast, i64 1 119 store float 1.0, ptr %extractelement1, align 4 120 ret void 121} 122 123; Same as split1To0 but from 0 to 1, to make sure the addrspacecast preserve 124; the source and destination addrspaces properly. 125; ALL-LABEL: split0To1 126define void @split0To1(ptr nocapture noundef readonly %xs) { 127; CLS32: cvta.to.global.u32 128; CLS32: cvta.to.global.u32 129; CLS64: cvta.to.global.u64 130; CLS64: cvta.to.global.u64 131; ALL: st.global.u32 132; ALL: st.global.u32 133 %vec_addr = load <2 x ptr>, ptr %xs, align 16 134 %addrspacecast = addrspacecast <2 x ptr> %vec_addr to <2 x ptr addrspace(1)> 135 %extractelement0 = extractelement <2 x ptr addrspace(1)> %addrspacecast, i64 0 136 store float 0.5, ptr addrspace(1) %extractelement0, align 4 137 %extractelement1 = extractelement <2 x ptr addrspace(1)> %addrspacecast, i64 1 138 store float 1.0, ptr addrspace(1) %extractelement1, align 4 139 ret void 140} 141 142; Check that we support addrspacecast when a widening is required 143; (3 x ptr => 4 x ptr). 144; ALL-LABEL: widen1To0 145define void @widen1To0(ptr nocapture noundef readonly %xs) { 146; CLS32: cvta.global.u32 147; CLS32: cvta.global.u32 148; CLS32: cvta.global.u32 149 150; CLS64: cvta.global.u64 151; CLS64: cvta.global.u64 152; CLS64: cvta.global.u64 153 154; ALL: st.u32 155; ALL: st.u32 156; ALL: st.u32 157 %vec_addr = load <3 x ptr addrspace(1)>, ptr %xs, align 16 158 %addrspacecast = addrspacecast <3 x ptr addrspace(1)> %vec_addr to <3 x ptr> 159 %extractelement0 = extractelement <3 x ptr> %addrspacecast, i64 0 160 store float 0.5, ptr %extractelement0, align 4 161 %extractelement1 = extractelement <3 x ptr> %addrspacecast, i64 1 162 store float 1.0, ptr %extractelement1, align 4 163 %extractelement2 = extractelement <3 x ptr> %addrspacecast, i64 2 164 store float 1.5, ptr %extractelement2, align 4 165 ret void 166} 167 168; Same as widen1To0 but from 0 to 1, to make sure the addrspacecast preserve 169; the source and destination addrspaces properly. 170; ALL-LABEL: widen0To1 171define void @widen0To1(ptr nocapture noundef readonly %xs) { 172; CLS32: cvta.to.global.u32 173; CLS32: cvta.to.global.u32 174; CLS32: cvta.to.global.u32 175 176; CLS64: cvta.to.global.u64 177; CLS64: cvta.to.global.u64 178; CLS64: cvta.to.global.u64 179 180; ALL: st.global.u32 181; ALL: st.global.u32 182; ALL: st.global.u32 183 %vec_addr = load <3 x ptr>, ptr %xs, align 16 184 %addrspacecast = addrspacecast <3 x ptr> %vec_addr to <3 x ptr addrspace(1)> 185 %extractelement0 = extractelement <3 x ptr addrspace(1)> %addrspacecast, i64 0 186 store float 0.5, ptr addrspace(1) %extractelement0, align 4 187 %extractelement1 = extractelement <3 x ptr addrspace(1)> %addrspacecast, i64 1 188 store float 1.0, ptr addrspace(1) %extractelement1, align 4 189 %extractelement2 = extractelement <3 x ptr addrspace(1)> %addrspacecast, i64 2 190 store float 1.5, ptr addrspace(1) %extractelement2, align 4 191 ret void 192} 193