1; RUN: llc < %s -mtriple=nvptx64-unknown-unknown | FileCheck %s 2; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-unknown-unknown | %ptxas-verify %} 3; 4; Check that parameters of a __global__ (kernel) function do not get increased 5; alignment, and no additional vectorization is performed on loads/stores with 6; that parameters. 7; 8; Test IR is a minimized version of IR generated with the following command 9; from the source code below: 10; $ clang++ -O3 --cuda-gpu-arch=sm_35 -S -emit-llvm src.cu 11; 12; ---------------------------------------------------------------------------- 13; #include <stdint.h> 14; 15; struct St4x1 { uint32_t field[1]; }; 16; struct St4x2 { uint32_t field[2]; }; 17; struct St4x3 { uint32_t field[3]; }; 18; struct St4x4 { uint32_t field[4]; }; 19; struct St4x5 { uint32_t field[5]; }; 20; struct St4x6 { uint32_t field[6]; }; 21; struct St4x7 { uint32_t field[7]; }; 22; struct St4x8 { uint32_t field[8]; }; 23; struct St8x1 { uint64_t field[1]; }; 24; struct St8x2 { uint64_t field[2]; }; 25; struct St8x3 { uint64_t field[3]; }; 26; struct St8x4 { uint64_t field[4]; }; 27; 28; #define DECLARE_FUNCTION(StName) \ 29; static __global__ __attribute__((noinline)) \ 30; void foo_##StName(struct StName in, struct StName* ret) { \ 31; const unsigned size = sizeof(ret->field) / sizeof(*ret->field); \ 32; for (unsigned i = 0; i != size; ++i) \ 33; ret->field[i] = in.field[i]; \ 34; } \ 35; 36; DECLARE_FUNCTION(St4x1) 37; DECLARE_FUNCTION(St4x2) 38; DECLARE_FUNCTION(St4x3) 39; DECLARE_FUNCTION(St4x4) 40; DECLARE_FUNCTION(St4x5) 41; DECLARE_FUNCTION(St4x6) 42; DECLARE_FUNCTION(St4x7) 43; DECLARE_FUNCTION(St4x8) 44; DECLARE_FUNCTION(St8x1) 45; DECLARE_FUNCTION(St8x2) 46; DECLARE_FUNCTION(St8x3) 47; DECLARE_FUNCTION(St8x4) 48; ---------------------------------------------------------------------------- 49 50%struct.St4x1 = type { [1 x i32] } 51%struct.St4x2 = type { [2 x i32] } 52%struct.St4x3 = type { [3 x i32] } 53%struct.St4x4 = type { [4 x i32] } 54%struct.St4x5 = type { [5 x i32] } 55%struct.St4x6 = type { [6 x i32] } 56%struct.St4x7 = type { [7 x i32] } 57%struct.St4x8 = type { [8 x i32] } 58%struct.St8x1 = type { [1 x i64] } 59%struct.St8x2 = type { [2 x i64] } 60%struct.St8x3 = type { [3 x i64] } 61%struct.St8x4 = type { [4 x i64] } 62 63define dso_local void @foo_St4x1(ptr nocapture noundef readonly byval(%struct.St4x1) align 4 %in, ptr nocapture noundef writeonly %ret) { 64 ; CHECK-LABEL: .visible .func foo_St4x1( 65 ; CHECK: .param .align 4 .b8 foo_St4x1_param_0[4], 66 ; CHECK: .param .b64 foo_St4x1_param_1 67 ; CHECK: ) 68 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x1_param_1]; 69 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x1_param_0]; 70 ; CHECK: st.u32 [[[R1]]], [[R2]]; 71 ; CHECK: ret; 72 %1 = load i32, ptr %in, align 4 73 store i32 %1, ptr %ret, align 4 74 ret void 75} 76 77define dso_local void @foo_St4x2(ptr nocapture noundef readonly byval(%struct.St4x2) align 4 %in, ptr nocapture noundef writeonly %ret) { 78 ; CHECK-LABEL: .visible .func foo_St4x2( 79 ; CHECK: .param .align 4 .b8 foo_St4x2_param_0[8], 80 ; CHECK: .param .b64 foo_St4x2_param_1 81 ; CHECK: ) 82 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x2_param_1]; 83 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x2_param_0]; 84 ; CHECK: st.u32 [[[R1]]], [[R2]]; 85 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x2_param_0+4]; 86 ; CHECK: st.u32 [[[R1]]+4], [[R3]]; 87 ; CHECK: ret; 88 %1 = load i32, ptr %in, align 4 89 store i32 %1, ptr %ret, align 4 90 %arrayidx.1 = getelementptr inbounds [2 x i32], ptr %in, i64 0, i64 1 91 %2 = load i32, ptr %arrayidx.1, align 4 92 %arrayidx3.1 = getelementptr inbounds [2 x i32], ptr %ret, i64 0, i64 1 93 store i32 %2, ptr %arrayidx3.1, align 4 94 ret void 95} 96 97define dso_local void @foo_St4x3(ptr nocapture noundef readonly byval(%struct.St4x3) align 4 %in, ptr nocapture noundef writeonly %ret) { 98 ; CHECK-LABEL: .visible .func foo_St4x3( 99 ; CHECK: .param .align 4 .b8 foo_St4x3_param_0[12], 100 ; CHECK: .param .b64 foo_St4x3_param_1 101 ; CHECK: ) 102 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x3_param_1]; 103 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x3_param_0]; 104 ; CHECK: st.u32 [[[R1]]], [[R2]]; 105 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x3_param_0+4]; 106 ; CHECK: st.u32 [[[R1]]+4], [[R3]]; 107 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x3_param_0+8]; 108 ; CHECK: st.u32 [[[R1]]+8], [[R4]]; 109 ; CHECK: ret; 110 %1 = load i32, ptr %in, align 4 111 store i32 %1, ptr %ret, align 4 112 %arrayidx.1 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 1 113 %2 = load i32, ptr %arrayidx.1, align 4 114 %arrayidx3.1 = getelementptr inbounds [3 x i32], ptr %ret, i64 0, i64 1 115 store i32 %2, ptr %arrayidx3.1, align 4 116 %arrayidx.2 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 2 117 %3 = load i32, ptr %arrayidx.2, align 4 118 %arrayidx3.2 = getelementptr inbounds [3 x i32], ptr %ret, i64 0, i64 2 119 store i32 %3, ptr %arrayidx3.2, align 4 120 ret void 121} 122 123define dso_local void @foo_St4x4(ptr nocapture noundef readonly byval(%struct.St4x4) align 4 %in, ptr nocapture noundef writeonly %ret) { 124 ; CHECK-LABEL: .visible .func foo_St4x4( 125 ; CHECK: .param .align 4 .b8 foo_St4x4_param_0[16], 126 ; CHECK: .param .b64 foo_St4x4_param_1 127 ; CHECK: ) 128 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x4_param_1]; 129 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x4_param_0]; 130 ; CHECK: st.u32 [[[R1]]], [[R2]]; 131 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x4_param_0+4]; 132 ; CHECK: st.u32 [[[R1]]+4], [[R3]]; 133 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x4_param_0+8]; 134 ; CHECK: st.u32 [[[R1]]+8], [[R4]]; 135 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x4_param_0+12]; 136 ; CHECK: st.u32 [[[R1]]+12], [[R5]]; 137 ; CHECK: ret; 138 %1 = load i32, ptr %in, align 4 139 store i32 %1, ptr %ret, align 4 140 %arrayidx.1 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 1 141 %2 = load i32, ptr %arrayidx.1, align 4 142 %arrayidx3.1 = getelementptr inbounds [4 x i32], ptr %ret, i64 0, i64 1 143 store i32 %2, ptr %arrayidx3.1, align 4 144 %arrayidx.2 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 2 145 %3 = load i32, ptr %arrayidx.2, align 4 146 %arrayidx3.2 = getelementptr inbounds [4 x i32], ptr %ret, i64 0, i64 2 147 store i32 %3, ptr %arrayidx3.2, align 4 148 %arrayidx.3 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 3 149 %4 = load i32, ptr %arrayidx.3, align 4 150 %arrayidx3.3 = getelementptr inbounds [4 x i32], ptr %ret, i64 0, i64 3 151 store i32 %4, ptr %arrayidx3.3, align 4 152 ret void 153} 154 155define dso_local void @foo_St4x5(ptr nocapture noundef readonly byval(%struct.St4x5) align 4 %in, ptr nocapture noundef writeonly %ret) { 156 ; CHECK-LABEL: .visible .func foo_St4x5( 157 ; CHECK: .param .align 4 .b8 foo_St4x5_param_0[20], 158 ; CHECK: .param .b64 foo_St4x5_param_1 159 ; CHECK: ) 160 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x5_param_1]; 161 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x5_param_0]; 162 ; CHECK: st.u32 [[[R1]]], [[R2]]; 163 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x5_param_0+4]; 164 ; CHECK: st.u32 [[[R1]]+4], [[R3]]; 165 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x5_param_0+8]; 166 ; CHECK: st.u32 [[[R1]]+8], [[R4]]; 167 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x5_param_0+12]; 168 ; CHECK: st.u32 [[[R1]]+12], [[R5]]; 169 ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x5_param_0+16]; 170 ; CHECK: st.u32 [[[R1]]+16], [[R6]]; 171 ; CHECK: ret; 172 %1 = load i32, ptr %in, align 4 173 store i32 %1, ptr %ret, align 4 174 %arrayidx.1 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 1 175 %2 = load i32, ptr %arrayidx.1, align 4 176 %arrayidx3.1 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 1 177 store i32 %2, ptr %arrayidx3.1, align 4 178 %arrayidx.2 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 2 179 %3 = load i32, ptr %arrayidx.2, align 4 180 %arrayidx3.2 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 2 181 store i32 %3, ptr %arrayidx3.2, align 4 182 %arrayidx.3 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 3 183 %4 = load i32, ptr %arrayidx.3, align 4 184 %arrayidx3.3 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 3 185 store i32 %4, ptr %arrayidx3.3, align 4 186 %arrayidx.4 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 4 187 %5 = load i32, ptr %arrayidx.4, align 4 188 %arrayidx3.4 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 4 189 store i32 %5, ptr %arrayidx3.4, align 4 190 ret void 191} 192 193define dso_local void @foo_St4x6(ptr nocapture noundef readonly byval(%struct.St4x6) align 4 %in, ptr nocapture noundef writeonly %ret) { 194 ; CHECK-LABEL: .visible .func foo_St4x6( 195 ; CHECK: .param .align 4 .b8 foo_St4x6_param_0[24], 196 ; CHECK: .param .b64 foo_St4x6_param_1 197 ; CHECK: ) 198 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x6_param_1]; 199 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x6_param_0]; 200 ; CHECK: st.u32 [[[R1]]], [[R2]]; 201 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x6_param_0+4]; 202 ; CHECK: st.u32 [[[R1]]+4], [[R3]]; 203 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x6_param_0+8]; 204 ; CHECK: st.u32 [[[R1]]+8], [[R4]]; 205 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x6_param_0+12]; 206 ; CHECK: st.u32 [[[R1]]+12], [[R5]]; 207 ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x6_param_0+16]; 208 ; CHECK: st.u32 [[[R1]]+16], [[R6]]; 209 ; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x6_param_0+20]; 210 ; CHECK: st.u32 [[[R1]]+20], [[R7]]; 211 ; CHECK: ret; 212 %1 = load i32, ptr %in, align 4 213 store i32 %1, ptr %ret, align 4 214 %arrayidx.1 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 1 215 %2 = load i32, ptr %arrayidx.1, align 4 216 %arrayidx3.1 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 1 217 store i32 %2, ptr %arrayidx3.1, align 4 218 %arrayidx.2 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 2 219 %3 = load i32, ptr %arrayidx.2, align 4 220 %arrayidx3.2 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 2 221 store i32 %3, ptr %arrayidx3.2, align 4 222 %arrayidx.3 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 3 223 %4 = load i32, ptr %arrayidx.3, align 4 224 %arrayidx3.3 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 3 225 store i32 %4, ptr %arrayidx3.3, align 4 226 %arrayidx.4 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 4 227 %5 = load i32, ptr %arrayidx.4, align 4 228 %arrayidx3.4 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 4 229 store i32 %5, ptr %arrayidx3.4, align 4 230 %arrayidx.5 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 5 231 %6 = load i32, ptr %arrayidx.5, align 4 232 %arrayidx3.5 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 5 233 store i32 %6, ptr %arrayidx3.5, align 4 234 ret void 235} 236 237define dso_local void @foo_St4x7(ptr nocapture noundef readonly byval(%struct.St4x7) align 4 %in, ptr nocapture noundef writeonly %ret) { 238 ; CHECK-LABEL: .visible .func foo_St4x7( 239 ; CHECK: .param .align 4 .b8 foo_St4x7_param_0[28], 240 ; CHECK: .param .b64 foo_St4x7_param_1 241 ; CHECK: ) 242 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x7_param_1]; 243 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x7_param_0]; 244 ; CHECK: st.u32 [[[R1]]], [[R2]]; 245 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x7_param_0+4]; 246 ; CHECK: st.u32 [[[R1]]+4], [[R3]]; 247 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x7_param_0+8]; 248 ; CHECK: st.u32 [[[R1]]+8], [[R4]]; 249 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x7_param_0+12]; 250 ; CHECK: st.u32 [[[R1]]+12], [[R5]]; 251 ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x7_param_0+16]; 252 ; CHECK: st.u32 [[[R1]]+16], [[R6]]; 253 ; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x7_param_0+20]; 254 ; CHECK: st.u32 [[[R1]]+20], [[R7]]; 255 ; CHECK: ld.param.u32 [[R8:%r[0-9]+]], [foo_St4x7_param_0+24]; 256 ; CHECK: st.u32 [[[R1]]+24], [[R8]]; 257 ; CHECK: ret; 258 %1 = load i32, ptr %in, align 4 259 store i32 %1, ptr %ret, align 4 260 %arrayidx.1 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 1 261 %2 = load i32, ptr %arrayidx.1, align 4 262 %arrayidx3.1 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 1 263 store i32 %2, ptr %arrayidx3.1, align 4 264 %arrayidx.2 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 2 265 %3 = load i32, ptr %arrayidx.2, align 4 266 %arrayidx3.2 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 2 267 store i32 %3, ptr %arrayidx3.2, align 4 268 %arrayidx.3 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 3 269 %4 = load i32, ptr %arrayidx.3, align 4 270 %arrayidx3.3 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 3 271 store i32 %4, ptr %arrayidx3.3, align 4 272 %arrayidx.4 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 4 273 %5 = load i32, ptr %arrayidx.4, align 4 274 %arrayidx3.4 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 4 275 store i32 %5, ptr %arrayidx3.4, align 4 276 %arrayidx.5 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 5 277 %6 = load i32, ptr %arrayidx.5, align 4 278 %arrayidx3.5 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 5 279 store i32 %6, ptr %arrayidx3.5, align 4 280 %arrayidx.6 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 6 281 %7 = load i32, ptr %arrayidx.6, align 4 282 %arrayidx3.6 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 6 283 store i32 %7, ptr %arrayidx3.6, align 4 284 ret void 285} 286 287define dso_local void @foo_St4x8(ptr nocapture noundef readonly byval(%struct.St4x8) align 4 %in, ptr nocapture noundef writeonly %ret) { 288 ; CHECK-LABEL: .visible .func foo_St4x8( 289 ; CHECK: .param .align 4 .b8 foo_St4x8_param_0[32], 290 ; CHECK: .param .b64 foo_St4x8_param_1 291 ; CHECK: ) 292 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St4x8_param_1]; 293 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x8_param_0]; 294 ; CHECK: st.u32 [[[R1]]], [[R2]]; 295 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x8_param_0+4]; 296 ; CHECK: st.u32 [[[R1]]+4], [[R3]]; 297 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x8_param_0+8]; 298 ; CHECK: st.u32 [[[R1]]+8], [[R4]]; 299 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x8_param_0+12]; 300 ; CHECK: st.u32 [[[R1]]+12], [[R5]]; 301 ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x8_param_0+16]; 302 ; CHECK: st.u32 [[[R1]]+16], [[R6]]; 303 ; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x8_param_0+20]; 304 ; CHECK: st.u32 [[[R1]]+20], [[R7]]; 305 ; CHECK: ld.param.u32 [[R8:%r[0-9]+]], [foo_St4x8_param_0+24]; 306 ; CHECK: st.u32 [[[R1]]+24], [[R8]]; 307 ; CHECK: ld.param.u32 [[R9:%r[0-9]+]], [foo_St4x8_param_0+28]; 308 ; CHECK: st.u32 [[[R1]]+28], [[R9]]; 309 ; CHECK: ret; 310 %1 = load i32, ptr %in, align 4 311 store i32 %1, ptr %ret, align 4 312 %arrayidx.1 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 1 313 %2 = load i32, ptr %arrayidx.1, align 4 314 %arrayidx3.1 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 1 315 store i32 %2, ptr %arrayidx3.1, align 4 316 %arrayidx.2 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 2 317 %3 = load i32, ptr %arrayidx.2, align 4 318 %arrayidx3.2 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 2 319 store i32 %3, ptr %arrayidx3.2, align 4 320 %arrayidx.3 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 3 321 %4 = load i32, ptr %arrayidx.3, align 4 322 %arrayidx3.3 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 3 323 store i32 %4, ptr %arrayidx3.3, align 4 324 %arrayidx.4 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 4 325 %5 = load i32, ptr %arrayidx.4, align 4 326 %arrayidx3.4 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 4 327 store i32 %5, ptr %arrayidx3.4, align 4 328 %arrayidx.5 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 5 329 %6 = load i32, ptr %arrayidx.5, align 4 330 %arrayidx3.5 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 5 331 store i32 %6, ptr %arrayidx3.5, align 4 332 %arrayidx.6 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 6 333 %7 = load i32, ptr %arrayidx.6, align 4 334 %arrayidx3.6 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 6 335 store i32 %7, ptr %arrayidx3.6, align 4 336 %arrayidx.7 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 7 337 %8 = load i32, ptr %arrayidx.7, align 4 338 %arrayidx3.7 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 7 339 store i32 %8, ptr %arrayidx3.7, align 4 340 ret void 341} 342 343define dso_local void @foo_St8x1(ptr nocapture noundef readonly byval(%struct.St8x1) align 8 %in, ptr nocapture noundef writeonly %ret) { 344 ; CHECK-LABEL: .visible .func foo_St8x1( 345 ; CHECK: .param .align 8 .b8 foo_St8x1_param_0[8], 346 ; CHECK: .param .b64 foo_St8x1_param_1 347 ; CHECK: ) 348 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x1_param_1]; 349 ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x1_param_0]; 350 ; CHECK: st.u64 [[[R1]]], [[RD1]]; 351 ; CHECK: ret; 352 %1 = load i64, ptr %in, align 8 353 store i64 %1, ptr %ret, align 8 354 ret void 355} 356 357define dso_local void @foo_St8x2(ptr nocapture noundef readonly byval(%struct.St8x2) align 8 %in, ptr nocapture noundef writeonly %ret) { 358 ; CHECK-LABEL: .visible .func foo_St8x2( 359 ; CHECK: .param .align 8 .b8 foo_St8x2_param_0[16], 360 ; CHECK: .param .b64 foo_St8x2_param_1 361 ; CHECK: ) 362 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x2_param_1]; 363 ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x2_param_0]; 364 ; CHECK: st.u64 [[[R1]]], [[RD1]]; 365 ; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x2_param_0+8]; 366 ; CHECK: st.u64 [[[R1]]+8], [[RD2]]; 367 ; CHECK: ret; 368 %1 = load i64, ptr %in, align 8 369 store i64 %1, ptr %ret, align 8 370 %arrayidx.1 = getelementptr inbounds [2 x i64], ptr %in, i64 0, i64 1 371 %2 = load i64, ptr %arrayidx.1, align 8 372 %arrayidx3.1 = getelementptr inbounds [2 x i64], ptr %ret, i64 0, i64 1 373 store i64 %2, ptr %arrayidx3.1, align 8 374 ret void 375} 376 377define dso_local void @foo_St8x3(ptr nocapture noundef readonly byval(%struct.St8x3) align 8 %in, ptr nocapture noundef writeonly %ret) { 378 ; CHECK-LABEL: .visible .func foo_St8x3( 379 ; CHECK: .param .align 8 .b8 foo_St8x3_param_0[24], 380 ; CHECK: .param .b64 foo_St8x3_param_1 381 ; CHECK: ) 382 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x3_param_1]; 383 ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x3_param_0]; 384 ; CHECK: st.u64 [[[R1]]], [[RD1]]; 385 ; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x3_param_0+8]; 386 ; CHECK: st.u64 [[[R1]]+8], [[RD2]]; 387 ; CHECK: ld.param.u64 [[RD3:%rd[0-9]+]], [foo_St8x3_param_0+16]; 388 ; CHECK: st.u64 [[[R1]]+16], [[RD3]]; 389 ; CHECK: ret; 390 %1 = load i64, ptr %in, align 8 391 store i64 %1, ptr %ret, align 8 392 %arrayidx.1 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 1 393 %2 = load i64, ptr %arrayidx.1, align 8 394 %arrayidx3.1 = getelementptr inbounds [3 x i64], ptr %ret, i64 0, i64 1 395 store i64 %2, ptr %arrayidx3.1, align 8 396 %arrayidx.2 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 2 397 %3 = load i64, ptr %arrayidx.2, align 8 398 %arrayidx3.2 = getelementptr inbounds [3 x i64], ptr %ret, i64 0, i64 2 399 store i64 %3, ptr %arrayidx3.2, align 8 400 ret void 401} 402 403define dso_local void @foo_St8x4(ptr nocapture noundef readonly byval(%struct.St8x4) align 8 %in, ptr nocapture noundef writeonly %ret) { 404 ; CHECK-LABEL: .visible .func foo_St8x4( 405 ; CHECK: .param .align 8 .b8 foo_St8x4_param_0[32], 406 ; CHECK: .param .b64 foo_St8x4_param_1 407 ; CHECK: ) 408 ; CHECK: ld.param.u64 [[R1:%rd[0-9]+]], [foo_St8x4_param_1]; 409 ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x4_param_0]; 410 ; CHECK: st.u64 [[[R1]]], [[RD1]]; 411 ; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x4_param_0+8]; 412 ; CHECK: st.u64 [[[R1]]+8], [[RD2]]; 413 ; CHECK: ld.param.u64 [[RD3:%rd[0-9]+]], [foo_St8x4_param_0+16]; 414 ; CHECK: st.u64 [[[R1]]+16], [[RD3]]; 415 ; CHECK: ld.param.u64 [[RD4:%rd[0-9]+]], [foo_St8x4_param_0+24]; 416 ; CHECK: st.u64 [[[R1]]+24], [[RD4]]; 417 ; CHECK: ret; 418 %1 = load i64, ptr %in, align 8 419 store i64 %1, ptr %ret, align 8 420 %arrayidx.1 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 1 421 %2 = load i64, ptr %arrayidx.1, align 8 422 %arrayidx3.1 = getelementptr inbounds [4 x i64], ptr %ret, i64 0, i64 1 423 store i64 %2, ptr %arrayidx3.1, align 8 424 %arrayidx.2 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 2 425 %3 = load i64, ptr %arrayidx.2, align 8 426 %arrayidx3.2 = getelementptr inbounds [4 x i64], ptr %ret, i64 0, i64 2 427 store i64 %3, ptr %arrayidx3.2, align 8 428 %arrayidx.3 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 3 429 %4 = load i64, ptr %arrayidx.3, align 8 430 %arrayidx3.3 = getelementptr inbounds [4 x i64], ptr %ret, i64 0, i64 3 431 store i64 %4, ptr %arrayidx3.3, align 8 432 ret void 433} 434