1; RUN: llc < %s -mtriple=nvptx64-unknown-unknown | FileCheck %s 2; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64-unknown-unknown | %ptxas-verify %} 3; 4; Check that parameters of a __device__ function with private or internal 5; linkage called from a __global__ (kernel) function get increased alignment, 6; and additional vectorization is performed on loads/stores with that 7; parameters. 8; 9; Test IR is a minimized version of IR generated with the following command 10; from the source code below: 11; $ clang++ -O3 --cuda-gpu-arch=sm_35 -S -emit-llvm src.cu 12; 13; ---------------------------------------------------------------------------- 14; #include <stdint.h> 15; 16; struct St4x1 { uint32_t field[1]; }; 17; struct St4x2 { uint32_t field[2]; }; 18; struct St4x3 { uint32_t field[3]; }; 19; struct St4x4 { uint32_t field[4]; }; 20; struct St4x5 { uint32_t field[5]; }; 21; struct St4x6 { uint32_t field[6]; }; 22; struct St4x7 { uint32_t field[7]; }; 23; struct St4x8 { uint32_t field[8]; }; 24; struct St8x1 { uint64_t field[1]; }; 25; struct St8x2 { uint64_t field[2]; }; 26; struct St8x3 { uint64_t field[3]; }; 27; struct St8x4 { uint64_t field[4]; }; 28; 29; #define DECLARE_CALLEE(StName) \ 30; static __device__ __attribute__((noinline)) \ 31; struct StName callee_##StName(struct StName in) { \ 32; struct StName ret; \ 33; const unsigned size = sizeof(ret.field) / sizeof(*ret.field); \ 34; for (unsigned i = 0; i != size; ++i) \ 35; ret.field[i] = in.field[i]; \ 36; return ret; \ 37; } \ 38 39; #define DECLARE_CALLER(StName) \ 40; __global__ \ 41; void caller_##StName(struct StName in, struct StName* ret) \ 42; { \ 43; *ret = callee_##StName(in); \ 44; } \ 45; 46; #define DECLARE_CALL(StName) \ 47; DECLARE_CALLEE(StName) \ 48; DECLARE_CALLER(StName) \ 49; 50; DECLARE_CALL(St4x1) 51; DECLARE_CALL(St4x2) 52; DECLARE_CALL(St4x3) 53; DECLARE_CALL(St4x4) 54; DECLARE_CALL(St4x5) 55; DECLARE_CALL(St4x6) 56; DECLARE_CALL(St4x7) 57; DECLARE_CALL(St4x8) 58; DECLARE_CALL(St8x1) 59; DECLARE_CALL(St8x2) 60; DECLARE_CALL(St8x3) 61; DECLARE_CALL(St8x4) 62; ---------------------------------------------------------------------------- 63 64%struct.St4x1 = type { [1 x i32] } 65%struct.St4x2 = type { [2 x i32] } 66%struct.St4x3 = type { [3 x i32] } 67%struct.St4x4 = type { [4 x i32] } 68%struct.St4x5 = type { [5 x i32] } 69%struct.St4x6 = type { [6 x i32] } 70%struct.St4x7 = type { [7 x i32] } 71%struct.St4x8 = type { [8 x i32] } 72%struct.St8x1 = type { [1 x i64] } 73%struct.St8x2 = type { [2 x i64] } 74%struct.St8x3 = type { [3 x i64] } 75%struct.St8x4 = type { [4 x i64] } 76 77; Section 1 - checking that: 78; - function argument (including retval) vectorization is done with internal linkage; 79; - caller and callee specify correct alignment for callee's params. 80 81define dso_local void @caller_St4x1(ptr nocapture noundef readonly byval(%struct.St4x1) align 4 %in, ptr nocapture noundef writeonly %ret) { 82 ; CHECK-LABEL: .visible .func caller_St4x1( 83 ; CHECK: .param .align 4 .b8 caller_St4x1_param_0[4], 84 ; CHECK: .param .b64 caller_St4x1_param_1 85 ; CHECK: ) 86 ; CHECK: .param .b32 param0; 87 ; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; 88 ; CHECK: .param .align 16 .b8 retval0[4]; 89 ; CHECK: call.uni (retval0), 90 ; CHECK-NEXT: callee_St4x1, 91 ; CHECK-NEXT: ( 92 ; CHECK-NEXT: param0 93 ; CHECK-NEXT: ); 94 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; 95 %1 = load i32, ptr %in, align 4 96 %call = tail call fastcc [1 x i32] @callee_St4x1(i32 %1) #2 97 %.fca.0.extract = extractvalue [1 x i32] %call, 0 98 store i32 %.fca.0.extract, ptr %ret, align 4 99 ret void 100} 101 102define internal fastcc [1 x i32] @callee_St4x1(i32 %in.0.val) { 103 ; CHECK: .func (.param .align 16 .b8 func_retval0[4]) 104 ; CHECK-LABEL: callee_St4x1( 105 ; CHECK-NEXT: .param .b32 callee_St4x1_param_0 106 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [callee_St4x1_param_0]; 107 ; CHECK: st.param.b32 [func_retval0], [[R1]]; 108 ; CHECK-NEXT: ret; 109 %oldret = insertvalue [1 x i32] poison, i32 %in.0.val, 0 110 ret [1 x i32] %oldret 111} 112 113define dso_local void @caller_St4x2(ptr nocapture noundef readonly byval(%struct.St4x2) align 4 %in, ptr nocapture noundef writeonly %ret) { 114 ; CHECK-LABEL: .visible .func caller_St4x2( 115 ; CHECK: .param .align 4 .b8 caller_St4x2_param_0[8], 116 ; CHECK: .param .b64 caller_St4x2_param_1 117 ; CHECK: ) 118 ; CHECK: .param .align 16 .b8 param0[8]; 119 ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; 120 ; CHECK: .param .align 16 .b8 retval0[8]; 121 ; CHECK: call.uni (retval0), 122 ; CHECK-NEXT: callee_St4x2, 123 ; CHECK-NEXT: ( 124 ; CHECK-NEXT: param0 125 ; CHECK-NEXT: ); 126 ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; 127 %agg.tmp = alloca %struct.St4x2, align 8 128 %1 = load i64, ptr %in, align 4 129 store i64 %1, ptr %agg.tmp, align 8 130 %call = tail call fastcc [2 x i32] @callee_St4x2(ptr noundef nonnull byval(%struct.St4x2) align 4 %agg.tmp) #2 131 %.fca.0.extract = extractvalue [2 x i32] %call, 0 132 %.fca.1.extract = extractvalue [2 x i32] %call, 1 133 store i32 %.fca.0.extract, ptr %ret, align 4 134 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 4 135 store i32 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 4 136 ret void 137} 138 139define internal fastcc [2 x i32] @callee_St4x2(ptr nocapture noundef readonly byval(%struct.St4x2) align 4 %in) { 140 ; CHECK: .func (.param .align 16 .b8 func_retval0[8]) 141 ; CHECK-LABEL: callee_St4x2( 142 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x2_param_0[8] 143 ; CHECK: ld.param.v2.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]]}, [callee_St4x2_param_0]; 144 ; CHECK: st.param.v2.b32 [func_retval0], {[[R1]], [[R2]]}; 145 ; CHECK-NEXT: ret; 146 %1 = load i32, ptr %in, align 4 147 %arrayidx.1 = getelementptr inbounds [2 x i32], ptr %in, i64 0, i64 1 148 %2 = load i32, ptr %arrayidx.1, align 4 149 %3 = insertvalue [2 x i32] poison, i32 %1, 0 150 %oldret = insertvalue [2 x i32] %3, i32 %2, 1 151 ret [2 x i32] %oldret 152} 153 154define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct.St4x3) align 4 %in, ptr nocapture noundef writeonly %ret) { 155 ; CHECK-LABEL: .visible .func caller_St4x3( 156 ; CHECK: .param .align 4 .b8 caller_St4x3_param_0[12], 157 ; CHECK: .param .b64 caller_St4x3_param_1 158 ; CHECK: ) 159 ; CHECK: .param .align 16 .b8 param0[12]; 160 ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; 161 ; CHECK: st.param.b32 [param0+8], {{%r[0-9]+}}; 162 ; CHECK: .param .align 16 .b8 retval0[12]; 163 ; CHECK: call.uni (retval0), 164 ; CHECK-NEXT: callee_St4x3, 165 ; CHECK-NEXT: ( 166 ; CHECK-NEXT: param0 167 ; CHECK-NEXT: ); 168 ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; 169 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+8]; 170 %call = tail call fastcc [3 x i32] @callee_St4x3(ptr noundef nonnull byval(%struct.St4x3) align 4 %in) #2 171 %.fca.0.extract = extractvalue [3 x i32] %call, 0 172 %.fca.1.extract = extractvalue [3 x i32] %call, 1 173 %.fca.2.extract = extractvalue [3 x i32] %call, 2 174 store i32 %.fca.0.extract, ptr %ret, align 4 175 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 4 176 store i32 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 4 177 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8 178 store i32 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 4 179 ret void 180} 181 182define internal fastcc [3 x i32] @callee_St4x3(ptr nocapture noundef readonly byval(%struct.St4x3) align 4 %in) { 183 ; CHECK: .func (.param .align 16 .b8 func_retval0[12]) 184 ; CHECK-LABEL: callee_St4x3( 185 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x3_param_0[12] 186 ; CHECK: ld.param.v2.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]]}, [callee_St4x3_param_0]; 187 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [callee_St4x3_param_0+8]; 188 ; CHECK: st.param.v2.b32 [func_retval0], {[[R1]], [[R2]]}; 189 ; CHECK: st.param.b32 [func_retval0+8], [[R3]]; 190 ; CHECK-NEXT: ret; 191 %1 = load i32, ptr %in, align 4 192 %arrayidx.1 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 1 193 %2 = load i32, ptr %arrayidx.1, align 4 194 %arrayidx.2 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 2 195 %3 = load i32, ptr %arrayidx.2, align 4 196 %4 = insertvalue [3 x i32] poison, i32 %1, 0 197 %5 = insertvalue [3 x i32] %4, i32 %2, 1 198 %oldret = insertvalue [3 x i32] %5, i32 %3, 2 199 ret [3 x i32] %oldret 200} 201 202define dso_local void @caller_St4x4(ptr nocapture noundef readonly byval(%struct.St4x4) align 4 %in, ptr nocapture noundef writeonly %ret) { 203 ; CHECK-LABEL: .visible .func caller_St4x4( 204 ; CHECK: .param .align 4 .b8 caller_St4x4_param_0[16], 205 ; CHECK: .param .b64 caller_St4x4_param_1 206 ; CHECK: ) 207 ; CHECK: .param .align 16 .b8 param0[16]; 208 ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; 209 ; CHECK: .param .align 16 .b8 retval0[16]; 210 ; CHECK: call.uni (retval0), 211 ; CHECK-NEXT: callee_St4x4, 212 ; CHECK-NEXT: ( 213 ; CHECK-NEXT: param0 214 ; CHECK-NEXT: ); 215 ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; 216 %call = tail call fastcc [4 x i32] @callee_St4x4(ptr noundef nonnull byval(%struct.St4x4) align 4 %in) #2 217 %.fca.0.extract = extractvalue [4 x i32] %call, 0 218 %.fca.1.extract = extractvalue [4 x i32] %call, 1 219 %.fca.2.extract = extractvalue [4 x i32] %call, 2 220 %.fca.3.extract = extractvalue [4 x i32] %call, 3 221 store i32 %.fca.0.extract, ptr %ret, align 4 222 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 4 223 store i32 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 4 224 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8 225 store i32 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 4 226 %ref.tmp.sroa.6.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 12 227 store i32 %.fca.3.extract, ptr %ref.tmp.sroa.6.0..sroa_idx, align 4 228 ret void 229} 230 231define internal fastcc [4 x i32] @callee_St4x4(ptr nocapture noundef readonly byval(%struct.St4x4) align 4 %in) { 232 ; CHECK: .func (.param .align 16 .b8 func_retval0[16]) 233 ; CHECK-LABEL: callee_St4x4( 234 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x4_param_0[16] 235 ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x4_param_0]; 236 ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; 237 ; CHECK-NEXT: ret; 238 %1 = load i32, ptr %in, align 4 239 %arrayidx.1 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 1 240 %2 = load i32, ptr %arrayidx.1, align 4 241 %arrayidx.2 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 2 242 %3 = load i32, ptr %arrayidx.2, align 4 243 %arrayidx.3 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 3 244 %4 = load i32, ptr %arrayidx.3, align 4 245 %5 = insertvalue [4 x i32] poison, i32 %1, 0 246 %6 = insertvalue [4 x i32] %5, i32 %2, 1 247 %7 = insertvalue [4 x i32] %6, i32 %3, 2 248 %oldret = insertvalue [4 x i32] %7, i32 %4, 3 249 ret [4 x i32] %oldret 250} 251 252define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct.St4x5) align 4 %in, ptr nocapture noundef writeonly %ret) { 253 ; CHECK-LABEL: .visible .func caller_St4x5( 254 ; CHECK: .param .align 4 .b8 caller_St4x5_param_0[20], 255 ; CHECK: .param .b64 caller_St4x5_param_1 256 ; CHECK: ) 257 ; CHECK: .param .align 16 .b8 param0[20]; 258 ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; 259 ; CHECK: st.param.b32 [param0+16], {{%r[0-9]+}}; 260 ; CHECK: .param .align 16 .b8 retval0[20]; 261 ; CHECK: call.uni (retval0), 262 ; CHECK-NEXT: callee_St4x5, 263 ; CHECK-NEXT: ( 264 ; CHECK-NEXT: param0 265 ; CHECK-NEXT: ); 266 ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; 267 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+16]; 268 %call = tail call fastcc [5 x i32] @callee_St4x5(ptr noundef nonnull byval(%struct.St4x5) align 4 %in) #2 269 %.fca.0.extract = extractvalue [5 x i32] %call, 0 270 %.fca.1.extract = extractvalue [5 x i32] %call, 1 271 %.fca.2.extract = extractvalue [5 x i32] %call, 2 272 %.fca.3.extract = extractvalue [5 x i32] %call, 3 273 %.fca.4.extract = extractvalue [5 x i32] %call, 4 274 store i32 %.fca.0.extract, ptr %ret, align 4 275 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 4 276 store i32 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 4 277 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8 278 store i32 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 4 279 %ref.tmp.sroa.6.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 12 280 store i32 %.fca.3.extract, ptr %ref.tmp.sroa.6.0..sroa_idx, align 4 281 %ref.tmp.sroa.7.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 16 282 store i32 %.fca.4.extract, ptr %ref.tmp.sroa.7.0..sroa_idx, align 4 283 ret void 284} 285 286define internal fastcc [5 x i32] @callee_St4x5(ptr nocapture noundef readonly byval(%struct.St4x5) align 4 %in) { 287 ; CHECK: .func (.param .align 16 .b8 func_retval0[20]) 288 ; CHECK-LABEL: callee_St4x5( 289 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x5_param_0[20] 290 ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x5_param_0]; 291 ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [callee_St4x5_param_0+16]; 292 ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; 293 ; CHECK: st.param.b32 [func_retval0+16], [[R5]]; 294 ; CHECK-NEXT: ret; 295 %1 = load i32, ptr %in, align 4 296 %arrayidx.1 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 1 297 %2 = load i32, ptr %arrayidx.1, align 4 298 %arrayidx.2 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 2 299 %3 = load i32, ptr %arrayidx.2, align 4 300 %arrayidx.3 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 3 301 %4 = load i32, ptr %arrayidx.3, align 4 302 %arrayidx.4 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 4 303 %5 = load i32, ptr %arrayidx.4, align 4 304 %6 = insertvalue [5 x i32] poison, i32 %1, 0 305 %7 = insertvalue [5 x i32] %6, i32 %2, 1 306 %8 = insertvalue [5 x i32] %7, i32 %3, 2 307 %9 = insertvalue [5 x i32] %8, i32 %4, 3 308 %oldret = insertvalue [5 x i32] %9, i32 %5, 4 309 ret [5 x i32] %oldret 310} 311 312define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct.St4x6) align 4 %in, ptr nocapture noundef writeonly %ret) { 313 ; CHECK-LABEL: .visible .func caller_St4x6( 314 ; CHECK: .param .align 4 .b8 caller_St4x6_param_0[24], 315 ; CHECK: .param .b64 caller_St4x6_param_1 316 ; CHECK: ) 317 ; CHECK: .param .align 16 .b8 param0[24]; 318 ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; 319 ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}}; 320 ; CHECK: .param .align 16 .b8 retval0[24]; 321 ; CHECK: call.uni (retval0), 322 ; CHECK-NEXT: callee_St4x6, 323 ; CHECK-NEXT: ( 324 ; CHECK-NEXT: param0 325 ; CHECK-NEXT: ); 326 ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; 327 ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; 328 %call = tail call fastcc [6 x i32] @callee_St4x6(ptr noundef nonnull byval(%struct.St4x6) align 4 %in) #2 329 %.fca.0.extract = extractvalue [6 x i32] %call, 0 330 %.fca.1.extract = extractvalue [6 x i32] %call, 1 331 %.fca.2.extract = extractvalue [6 x i32] %call, 2 332 %.fca.3.extract = extractvalue [6 x i32] %call, 3 333 %.fca.4.extract = extractvalue [6 x i32] %call, 4 334 %.fca.5.extract = extractvalue [6 x i32] %call, 5 335 store i32 %.fca.0.extract, ptr %ret, align 4 336 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 4 337 store i32 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 4 338 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8 339 store i32 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 4 340 %ref.tmp.sroa.6.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 12 341 store i32 %.fca.3.extract, ptr %ref.tmp.sroa.6.0..sroa_idx, align 4 342 %ref.tmp.sroa.7.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 16 343 store i32 %.fca.4.extract, ptr %ref.tmp.sroa.7.0..sroa_idx, align 4 344 %ref.tmp.sroa.8.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 20 345 store i32 %.fca.5.extract, ptr %ref.tmp.sroa.8.0..sroa_idx, align 4 346 ret void 347} 348 349define internal fastcc [6 x i32] @callee_St4x6(ptr nocapture noundef readonly byval(%struct.St4x6) align 4 %in) { 350 ; CHECK: .func (.param .align 16 .b8 func_retval0[24]) 351 ; CHECK-LABEL: callee_St4x6( 352 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x6_param_0[24] 353 ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x6_param_0]; 354 ; CHECK: ld.param.v2.u32 {[[R5:%r[0-9]+]], [[R6:%r[0-9]+]]}, [callee_St4x6_param_0+16]; 355 ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; 356 ; CHECK: st.param.v2.b32 [func_retval0+16], {[[R5]], [[R6]]}; 357 ; CHECK-NEXT: ret; 358 %1 = load i32, ptr %in, align 4 359 %arrayidx.1 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 1 360 %2 = load i32, ptr %arrayidx.1, align 4 361 %arrayidx.2 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 2 362 %3 = load i32, ptr %arrayidx.2, align 4 363 %arrayidx.3 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 3 364 %4 = load i32, ptr %arrayidx.3, align 4 365 %arrayidx.4 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 4 366 %5 = load i32, ptr %arrayidx.4, align 4 367 %arrayidx.5 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 5 368 %6 = load i32, ptr %arrayidx.5, align 4 369 %7 = insertvalue [6 x i32] poison, i32 %1, 0 370 %8 = insertvalue [6 x i32] %7, i32 %2, 1 371 %9 = insertvalue [6 x i32] %8, i32 %3, 2 372 %10 = insertvalue [6 x i32] %9, i32 %4, 3 373 %11 = insertvalue [6 x i32] %10, i32 %5, 4 374 %oldret = insertvalue [6 x i32] %11, i32 %6, 5 375 ret [6 x i32] %oldret 376} 377 378define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct.St4x7) align 4 %in, ptr nocapture noundef writeonly %ret) { 379 ; CHECK-LABEL: .visible .func caller_St4x7( 380 ; CHECK: .param .align 4 .b8 caller_St4x7_param_0[28], 381 ; CHECK: .param .b64 caller_St4x7_param_1 382 ; CHECK: ) 383 ; CHECK: .param .align 16 .b8 param0[28]; 384 ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; 385 ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}}; 386 ; CHECK: st.param.b32 [param0+24], {{%r[0-9]+}}; 387 ; CHECK: .param .align 16 .b8 retval0[28]; 388 ; CHECK: call.uni (retval0), 389 ; CHECK-NEXT: callee_St4x7, 390 ; CHECK-NEXT: ( 391 ; CHECK-NEXT: param0 392 ; CHECK-NEXT: ); 393 ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; 394 ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; 395 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+24]; 396 %call = tail call fastcc [7 x i32] @callee_St4x7(ptr noundef nonnull byval(%struct.St4x7) align 4 %in) #2 397 %.fca.0.extract = extractvalue [7 x i32] %call, 0 398 %.fca.1.extract = extractvalue [7 x i32] %call, 1 399 %.fca.2.extract = extractvalue [7 x i32] %call, 2 400 %.fca.3.extract = extractvalue [7 x i32] %call, 3 401 %.fca.4.extract = extractvalue [7 x i32] %call, 4 402 %.fca.5.extract = extractvalue [7 x i32] %call, 5 403 %.fca.6.extract = extractvalue [7 x i32] %call, 6 404 store i32 %.fca.0.extract, ptr %ret, align 4 405 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 4 406 store i32 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 4 407 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8 408 store i32 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 4 409 %ref.tmp.sroa.6.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 12 410 store i32 %.fca.3.extract, ptr %ref.tmp.sroa.6.0..sroa_idx, align 4 411 %ref.tmp.sroa.7.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 16 412 store i32 %.fca.4.extract, ptr %ref.tmp.sroa.7.0..sroa_idx, align 4 413 %ref.tmp.sroa.8.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 20 414 store i32 %.fca.5.extract, ptr %ref.tmp.sroa.8.0..sroa_idx, align 4 415 %ref.tmp.sroa.9.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 24 416 store i32 %.fca.6.extract, ptr %ref.tmp.sroa.9.0..sroa_idx, align 4 417 ret void 418} 419 420define internal fastcc [7 x i32] @callee_St4x7(ptr nocapture noundef readonly byval(%struct.St4x7) align 4 %in) { 421 ; CHECK: .func (.param .align 16 .b8 func_retval0[28]) 422 ; CHECK-LABEL: callee_St4x7( 423 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x7_param_0[28] 424 ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x7_param_0]; 425 ; CHECK: ld.param.v2.u32 {[[R5:%r[0-9]+]], [[R6:%r[0-9]+]]}, [callee_St4x7_param_0+16]; 426 ; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [callee_St4x7_param_0+24]; 427 ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; 428 ; CHECK: st.param.v2.b32 [func_retval0+16], {[[R5]], [[R6]]}; 429 ; CHECK: st.param.b32 [func_retval0+24], [[R7]]; 430 ; CHECK-NEXT: ret; 431 %1 = load i32, ptr %in, align 4 432 %arrayidx.1 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 1 433 %2 = load i32, ptr %arrayidx.1, align 4 434 %arrayidx.2 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 2 435 %3 = load i32, ptr %arrayidx.2, align 4 436 %arrayidx.3 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 3 437 %4 = load i32, ptr %arrayidx.3, align 4 438 %arrayidx.4 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 4 439 %5 = load i32, ptr %arrayidx.4, align 4 440 %arrayidx.5 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 5 441 %6 = load i32, ptr %arrayidx.5, align 4 442 %arrayidx.6 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 6 443 %7 = load i32, ptr %arrayidx.6, align 4 444 %8 = insertvalue [7 x i32] poison, i32 %1, 0 445 %9 = insertvalue [7 x i32] %8, i32 %2, 1 446 %10 = insertvalue [7 x i32] %9, i32 %3, 2 447 %11 = insertvalue [7 x i32] %10, i32 %4, 3 448 %12 = insertvalue [7 x i32] %11, i32 %5, 4 449 %13 = insertvalue [7 x i32] %12, i32 %6, 5 450 %oldret = insertvalue [7 x i32] %13, i32 %7, 6 451 ret [7 x i32] %oldret 452} 453 454define dso_local void @caller_St4x8(ptr nocapture noundef readonly byval(%struct.St4x8) align 4 %in, ptr nocapture noundef writeonly %ret) { 455 ; CHECK-LABEL: .visible .func caller_St4x8( 456 ; CHECK: .param .align 4 .b8 caller_St4x8_param_0[32], 457 ; CHECK: .param .b64 caller_St4x8_param_1 458 ; CHECK: ) 459 ; CHECK: .param .align 16 .b8 param0[32]; 460 ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; 461 ; CHECK: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; 462 ; CHECK: .param .align 16 .b8 retval0[32]; 463 ; CHECK: call.uni (retval0), 464 ; CHECK-NEXT: callee_St4x8, 465 ; CHECK-NEXT: ( 466 ; CHECK-NEXT: param0 467 ; CHECK-NEXT: ); 468 ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; 469 ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; 470 %call = tail call fastcc [8 x i32] @callee_St4x8(ptr noundef nonnull byval(%struct.St4x8) align 4 %in) #2 471 %.fca.0.extract = extractvalue [8 x i32] %call, 0 472 %.fca.1.extract = extractvalue [8 x i32] %call, 1 473 %.fca.2.extract = extractvalue [8 x i32] %call, 2 474 %.fca.3.extract = extractvalue [8 x i32] %call, 3 475 %.fca.4.extract = extractvalue [8 x i32] %call, 4 476 %.fca.5.extract = extractvalue [8 x i32] %call, 5 477 %.fca.6.extract = extractvalue [8 x i32] %call, 6 478 %.fca.7.extract = extractvalue [8 x i32] %call, 7 479 store i32 %.fca.0.extract, ptr %ret, align 4 480 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 4 481 store i32 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 4 482 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8 483 store i32 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 4 484 %ref.tmp.sroa.6.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 12 485 store i32 %.fca.3.extract, ptr %ref.tmp.sroa.6.0..sroa_idx, align 4 486 %ref.tmp.sroa.7.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 16 487 store i32 %.fca.4.extract, ptr %ref.tmp.sroa.7.0..sroa_idx, align 4 488 %ref.tmp.sroa.8.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 20 489 store i32 %.fca.5.extract, ptr %ref.tmp.sroa.8.0..sroa_idx, align 4 490 %ref.tmp.sroa.9.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 24 491 store i32 %.fca.6.extract, ptr %ref.tmp.sroa.9.0..sroa_idx, align 4 492 %ref.tmp.sroa.10.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 28 493 store i32 %.fca.7.extract, ptr %ref.tmp.sroa.10.0..sroa_idx, align 4 494 ret void 495} 496 497define internal fastcc [8 x i32] @callee_St4x8(ptr nocapture noundef readonly byval(%struct.St4x8) align 4 %in) { 498 ; CHECK: .func (.param .align 16 .b8 func_retval0[32]) 499 ; CHECK-LABEL: callee_St4x8( 500 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x8_param_0[32] 501 ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x8_param_0]; 502 ; CHECK: ld.param.v4.u32 {[[R5:%r[0-9]+]], [[R6:%r[0-9]+]], [[R7:%r[0-9]+]], [[R8:%r[0-9]+]]}, [callee_St4x8_param_0+16]; 503 ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; 504 ; CHECK: st.param.v4.b32 [func_retval0+16], {[[R5]], [[R6]], [[R7]], [[R8]]}; 505 ; CHECK-NEXT: ret; 506 %1 = load i32, ptr %in, align 4 507 %arrayidx.1 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 1 508 %2 = load i32, ptr %arrayidx.1, align 4 509 %arrayidx.2 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 2 510 %3 = load i32, ptr %arrayidx.2, align 4 511 %arrayidx.3 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 3 512 %4 = load i32, ptr %arrayidx.3, align 4 513 %arrayidx.4 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 4 514 %5 = load i32, ptr %arrayidx.4, align 4 515 %arrayidx.5 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 5 516 %6 = load i32, ptr %arrayidx.5, align 4 517 %arrayidx.6 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 6 518 %7 = load i32, ptr %arrayidx.6, align 4 519 %arrayidx.7 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 7 520 %8 = load i32, ptr %arrayidx.7, align 4 521 %9 = insertvalue [8 x i32] poison, i32 %1, 0 522 %10 = insertvalue [8 x i32] %9, i32 %2, 1 523 %11 = insertvalue [8 x i32] %10, i32 %3, 2 524 %12 = insertvalue [8 x i32] %11, i32 %4, 3 525 %13 = insertvalue [8 x i32] %12, i32 %5, 4 526 %14 = insertvalue [8 x i32] %13, i32 %6, 5 527 %15 = insertvalue [8 x i32] %14, i32 %7, 6 528 %oldret = insertvalue [8 x i32] %15, i32 %8, 7 529 ret [8 x i32] %oldret 530} 531 532define dso_local void @caller_St8x1(ptr nocapture noundef readonly byval(%struct.St8x1) align 8 %in, ptr nocapture noundef writeonly %ret) { 533 ; CHECK-LABEL: .visible .func caller_St8x1( 534 ; CHECK: .param .align 8 .b8 caller_St8x1_param_0[8], 535 ; CHECK: .param .b64 caller_St8x1_param_1 536 ; CHECK: ) 537 ; CHECK: .param .b64 param0; 538 ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; 539 ; CHECK: .param .align 16 .b8 retval0[8]; 540 ; CHECK: call.uni (retval0), 541 ; CHECK-NEXT: callee_St8x1, 542 ; CHECK-NEXT: ( 543 ; CHECK-NEXT: param0 544 ; CHECK-NEXT: ); 545 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; 546 %1 = load i64, ptr %in, align 8 547 %call = tail call fastcc [1 x i64] @callee_St8x1(i64 %1) #2 548 %.fca.0.extract = extractvalue [1 x i64] %call, 0 549 store i64 %.fca.0.extract, ptr %ret, align 8 550 ret void 551} 552 553define internal fastcc [1 x i64] @callee_St8x1(i64 %in.0.val) { 554 ; CHECK: .func (.param .align 16 .b8 func_retval0[8]) 555 ; CHECK-LABEL: callee_St8x1( 556 ; CHECK-NEXT: .param .b64 callee_St8x1_param_0 557 ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [callee_St8x1_param_0]; 558 ; CHECK: st.param.b64 [func_retval0], [[RD1]]; 559 ; CHECK-NEXT: ret; 560 %oldret = insertvalue [1 x i64] poison, i64 %in.0.val, 0 561 ret [1 x i64] %oldret 562} 563 564define dso_local void @caller_St8x2(ptr nocapture noundef readonly byval(%struct.St8x2) align 8 %in, ptr nocapture noundef writeonly %ret) { 565 ; CHECK-LABEL: .visible .func caller_St8x2( 566 ; CHECK: .param .align 8 .b8 caller_St8x2_param_0[16], 567 ; CHECK: .param .b64 caller_St8x2_param_1 568 ; CHECK: ) 569 ; CHECK: .param .align 16 .b8 param0[16]; 570 ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; 571 ; CHECK: .param .align 16 .b8 retval0[16]; 572 ; CHECK: call.uni (retval0), 573 ; CHECK-NEXT: callee_St8x2, 574 ; CHECK-NEXT: ( 575 ; CHECK-NEXT: param0 576 ; CHECK-NEXT: ); 577 ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; 578 %call = tail call fastcc [2 x i64] @callee_St8x2(ptr noundef nonnull byval(%struct.St8x2) align 8 %in) #2 579 %.fca.0.extract = extractvalue [2 x i64] %call, 0 580 %.fca.1.extract = extractvalue [2 x i64] %call, 1 581 store i64 %.fca.0.extract, ptr %ret, align 8 582 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8 583 store i64 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 8 584 ret void 585} 586 587define internal fastcc [2 x i64] @callee_St8x2(ptr nocapture noundef readonly byval(%struct.St8x2) align 8 %in) { 588 ; CHECK: .func (.param .align 16 .b8 func_retval0[16]) 589 ; CHECK-LABEL: callee_St8x2( 590 ; CHECK-NEXT: .param .align 16 .b8 callee_St8x2_param_0[16] 591 ; CHECK: ld.param.v2.u64 {[[RD1:%rd[0-9]+]], [[RD2:%rd[0-9]+]]}, [callee_St8x2_param_0]; 592 ; CHECK: st.param.v2.b64 [func_retval0], {[[RD1]], [[RD2]]}; 593 ; CHECK-NEXT: ret; 594 %1 = load i64, ptr %in, align 8 595 %arrayidx.1 = getelementptr inbounds [2 x i64], ptr %in, i64 0, i64 1 596 %2 = load i64, ptr %arrayidx.1, align 8 597 %3 = insertvalue [2 x i64] poison, i64 %1, 0 598 %oldret = insertvalue [2 x i64] %3, i64 %2, 1 599 ret [2 x i64] %oldret 600} 601 602define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct.St8x3) align 8 %in, ptr nocapture noundef writeonly %ret) { 603 ; CHECK-LABEL: .visible .func caller_St8x3( 604 ; CHECK: .param .align 8 .b8 caller_St8x3_param_0[24], 605 ; CHECK: .param .b64 caller_St8x3_param_1 606 ; CHECK: ) 607 ; CHECK: .param .align 16 .b8 param0[24]; 608 ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; 609 ; CHECK: st.param.b64 [param0+16], {{%rd[0-9]+}}; 610 ; CHECK: .param .align 16 .b8 retval0[24]; 611 ; CHECK: call.uni (retval0), 612 ; CHECK-NEXT: callee_St8x3, 613 ; CHECK-NEXT: ( 614 ; CHECK-NEXT: param0 615 ; CHECK-NEXT: ); 616 ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; 617 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+16]; 618 %call = tail call fastcc [3 x i64] @callee_St8x3(ptr noundef nonnull byval(%struct.St8x3) align 8 %in) #2 619 %.fca.0.extract = extractvalue [3 x i64] %call, 0 620 %.fca.1.extract = extractvalue [3 x i64] %call, 1 621 %.fca.2.extract = extractvalue [3 x i64] %call, 2 622 store i64 %.fca.0.extract, ptr %ret, align 8 623 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8 624 store i64 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 8 625 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 16 626 store i64 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 8 627 ret void 628} 629 630define internal fastcc [3 x i64] @callee_St8x3(ptr nocapture noundef readonly byval(%struct.St8x3) align 8 %in) { 631 ; CHECK: .func (.param .align 16 .b8 func_retval0[24]) 632 ; CHECK-LABEL: callee_St8x3( 633 ; CHECK-NEXT: .param .align 16 .b8 callee_St8x3_param_0[24] 634 ; CHECK: ld.param.v2.u64 {[[RD1:%rd[0-9]+]], [[RD2:%rd[0-9]+]]}, [callee_St8x3_param_0]; 635 ; CHECK: ld.param.u64 [[RD3:%rd[0-9]+]], [callee_St8x3_param_0+16]; 636 ; CHECK: st.param.v2.b64 [func_retval0], {[[RD1]], [[RD2]]}; 637 ; CHECK: st.param.b64 [func_retval0+16], [[RD3]]; 638 ; CHECK-NEXT: ret; 639 %1 = load i64, ptr %in, align 8 640 %arrayidx.1 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 1 641 %2 = load i64, ptr %arrayidx.1, align 8 642 %arrayidx.2 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 2 643 %3 = load i64, ptr %arrayidx.2, align 8 644 %4 = insertvalue [3 x i64] poison, i64 %1, 0 645 %5 = insertvalue [3 x i64] %4, i64 %2, 1 646 %oldret = insertvalue [3 x i64] %5, i64 %3, 2 647 ret [3 x i64] %oldret 648} 649 650define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct.St8x4) align 8 %in, ptr nocapture noundef writeonly %ret) { 651 ; CHECK-LABEL: .visible .func caller_St8x4( 652 ; CHECK: .param .align 8 .b8 caller_St8x4_param_0[32], 653 ; CHECK: .param .b64 caller_St8x4_param_1 654 ; CHECK: ) 655 ; CHECK: .param .align 16 .b8 param0[32]; 656 ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; 657 ; CHECK: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; 658 ; CHECK: .param .align 16 .b8 retval0[32]; 659 ; CHECK: call.uni (retval0), 660 ; CHECK-NEXT: callee_St8x4, 661 ; CHECK-NEXT: ( 662 ; CHECK-NEXT: param0 663 ; CHECK-NEXT: ); 664 ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; 665 ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+16]; 666 %call = tail call fastcc [4 x i64] @callee_St8x4(ptr noundef nonnull byval(%struct.St8x4) align 8 %in) #2 667 %.fca.0.extract = extractvalue [4 x i64] %call, 0 668 %.fca.1.extract = extractvalue [4 x i64] %call, 1 669 %.fca.2.extract = extractvalue [4 x i64] %call, 2 670 %.fca.3.extract = extractvalue [4 x i64] %call, 3 671 store i64 %.fca.0.extract, ptr %ret, align 8 672 %ref.tmp.sroa.4.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 8 673 store i64 %.fca.1.extract, ptr %ref.tmp.sroa.4.0..sroa_idx, align 8 674 %ref.tmp.sroa.5.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 16 675 store i64 %.fca.2.extract, ptr %ref.tmp.sroa.5.0..sroa_idx, align 8 676 %ref.tmp.sroa.6.0..sroa_idx = getelementptr inbounds i8, ptr %ret, i64 24 677 store i64 %.fca.3.extract, ptr %ref.tmp.sroa.6.0..sroa_idx, align 8 678 ret void 679} 680 681define internal fastcc [4 x i64] @callee_St8x4(ptr nocapture noundef readonly byval(%struct.St8x4) align 8 %in) { 682 ; CHECK: .func (.param .align 16 .b8 func_retval0[32]) 683 ; CHECK-LABEL: callee_St8x4( 684 ; CHECK-NEXT: .param .align 16 .b8 callee_St8x4_param_0[32] 685 ; CHECK: ld.param.v2.u64 {[[RD1:%rd[0-9]+]], [[RD2:%rd[0-9]+]]}, [callee_St8x4_param_0]; 686 ; CHECK: ld.param.v2.u64 {[[RD3:%rd[0-9]+]], [[RD4:%rd[0-9]+]]}, [callee_St8x4_param_0+16]; 687 ; CHECK: st.param.v2.b64 [func_retval0], {[[RD1]], [[RD2]]}; 688 ; CHECK: st.param.v2.b64 [func_retval0+16], {[[RD3]], [[RD4]]}; 689 ; CHECK-NEXT: ret; 690 %1 = load i64, ptr %in, align 8 691 %arrayidx.1 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 1 692 %2 = load i64, ptr %arrayidx.1, align 8 693 %arrayidx.2 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 2 694 %3 = load i64, ptr %arrayidx.2, align 8 695 %arrayidx.3 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 3 696 %4 = load i64, ptr %arrayidx.3, align 8 697 %5 = insertvalue [4 x i64] poison, i64 %1, 0 698 %6 = insertvalue [4 x i64] %5, i64 %2, 1 699 %7 = insertvalue [4 x i64] %6, i64 %3, 2 700 %oldret = insertvalue [4 x i64] %7, i64 %4, 3 701 ret [4 x i64] %oldret 702} 703 704; Section 2 - checking that function argument (including retval) vectorization is done with private linkage. 705 706define private fastcc [4 x i32] @callee_St4x4_private(ptr nocapture noundef readonly byval(%struct.St4x4) align 4 %in) { 707 ; CHECK: .func (.param .align 16 .b8 func_retval0[16]) 708 ; CHECK-LABEL: callee_St4x4_private( 709 ; CHECK-NEXT: .param .align 16 .b8 callee_St4x4_private_param_0[16] 710 ; CHECK: ld.param.v4.u32 {[[R1:%r[0-9]+]], [[R2:%r[0-9]+]], [[R3:%r[0-9]+]], [[R4:%r[0-9]+]]}, [callee_St4x4_private_param_0]; 711 ; CHECK: st.param.v4.b32 [func_retval0], {[[R1]], [[R2]], [[R3]], [[R4]]}; 712 ; CHECK-NEXT: ret; 713 %1 = load i32, ptr %in, align 4 714 %arrayidx.1 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 1 715 %2 = load i32, ptr %arrayidx.1, align 4 716 %arrayidx.2 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 2 717 %3 = load i32, ptr %arrayidx.2, align 4 718 %arrayidx.3 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 3 719 %4 = load i32, ptr %arrayidx.3, align 4 720 %5 = insertvalue [4 x i32] poison, i32 %1, 0 721 %6 = insertvalue [4 x i32] %5, i32 %2, 1 722 %7 = insertvalue [4 x i32] %6, i32 %3, 2 723 %oldret = insertvalue [4 x i32] %7, i32 %4, 3 724 ret [4 x i32] %oldret 725} 726 727; Section 3 - checking that function argument (including retval) vectorization 728; is NOT done with linkage types other than internal and private. 729 730define external fastcc [4 x i32] @callee_St4x4_external(ptr nocapture noundef readonly byval(%struct.St4x4) align 4 %in) { 731 ; CHECK: .func (.param .align 4 .b8 func_retval0[16]) 732 ; CHECK-LABEL: callee_St4x4_external( 733 ; CHECK-NEXT: .param .align 4 .b8 callee_St4x4_external_param_0[16] 734 ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [callee_St4x4_external_param_0]; 735 ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [callee_St4x4_external_param_0+4]; 736 ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [callee_St4x4_external_param_0+8]; 737 ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [callee_St4x4_external_param_0+12]; 738 ; CHECK: st.param.b32 [func_retval0], [[R1]]; 739 ; CHECK: st.param.b32 [func_retval0+4], [[R2]]; 740 ; CHECK: st.param.b32 [func_retval0+8], [[R3]]; 741 ; CHECK: st.param.b32 [func_retval0+12], [[R4]]; 742 ; CHECK-NEXT: ret; 743 %1 = load i32, ptr %in, align 4 744 %arrayidx.1 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 1 745 %2 = load i32, ptr %arrayidx.1, align 4 746 %arrayidx.2 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 2 747 %3 = load i32, ptr %arrayidx.2, align 4 748 %arrayidx.3 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 3 749 %4 = load i32, ptr %arrayidx.3, align 4 750 %5 = insertvalue [4 x i32] poison, i32 %1, 0 751 %6 = insertvalue [4 x i32] %5, i32 %2, 1 752 %7 = insertvalue [4 x i32] %6, i32 %3, 2 753 %oldret = insertvalue [4 x i32] %7, i32 %4, 3 754 ret [4 x i32] %oldret 755} 756 757