1; RUN: opt < %s -passes=loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S -pass-remarks-analysis=loop-vectorize 2>%t | FileCheck %s 2; RUN: cat %t | FileCheck --check-prefix=CHECK-REMARKS %s 3 4target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" 5 6; Tests basic vectorization of homogeneous struct literal returns. 7 8; TODO: Support vectorization in this case. 9; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported 10define void @struct_return_f32_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { 11; CHECK-LABEL: define void @struct_return_f32_widen 12; CHECK-NOT: vector.body: 13entry: 14 br label %for.body 15 16for.body: 17 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 18 %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv 19 %in_val = load float, ptr %arrayidx, align 4 20 %call = tail call { float, float } @foo(float %in_val) #0 21 %extract_a = extractvalue { float, float } %call, 0 22 %extract_b = extractvalue { float, float } %call, 1 23 %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv 24 store float %extract_a, ptr %arrayidx2, align 4 25 %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv 26 store float %extract_b, ptr %arrayidx4, align 4 27 %iv.next = add nuw nsw i64 %iv, 1 28 %exitcond.not = icmp eq i64 %iv.next, 1024 29 br i1 %exitcond.not, label %exit, label %for.body 30 31exit: 32 ret void 33} 34 35; TODO: Support vectorization in this case. 36; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported 37define void @struct_return_f64_widen(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { 38; CHECK-LABEL: define void @struct_return_f64_widen 39; CHECK-NOT: vector.body: 40entry: 41 br label %for.body 42 43for.body: 44 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 45 %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv 46 %in_val = load double, ptr %arrayidx, align 8 47 %call = tail call { double, double } @bar(double %in_val) #1 48 %extract_a = extractvalue { double, double } %call, 0 49 %extract_b = extractvalue { double, double } %call, 1 50 %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv 51 store double %extract_a, ptr %arrayidx2, align 8 52 %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv 53 store double %extract_b, ptr %arrayidx4, align 8 54 %iv.next = add nuw nsw i64 %iv, 1 55 %exitcond.not = icmp eq i64 %iv.next, 1024 56 br i1 %exitcond.not, label %exit, label %for.body 57 58exit: 59 ret void 60} 61 62; TODO: Support vectorization in this case. 63; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported 64define void @struct_return_f32_replicate(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { 65; CHECK-LABEL: define void @struct_return_f32_replicate 66; CHECK-NOT: vector.body: 67entry: 68 br label %for.body 69 70for.body: 71 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 72 %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv 73 %in_val = load float, ptr %arrayidx, align 4 74 ; #3 does not have a fixed-size vector mapping (so replication is used) 75 %call = tail call { float, float } @foo(float %in_val) #3 76 %extract_a = extractvalue { float, float } %call, 0 77 %extract_b = extractvalue { float, float } %call, 1 78 %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv 79 store float %extract_a, ptr %arrayidx2, align 4 80 %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv 81 store float %extract_b, ptr %arrayidx4, align 4 82 %iv.next = add nuw nsw i64 %iv, 1 83 %exitcond.not = icmp eq i64 %iv.next, 1024 84 br i1 %exitcond.not, label %exit, label %for.body 85 86exit: 87 ret void 88} 89 90; TODO: Support vectorization in this case. 91; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported 92define void @struct_return_f32_widen_rt_checks(ptr %in, ptr writeonly %out_a, ptr writeonly %out_b) { 93; CHECK-LABEL: define void @struct_return_f32_widen_rt_checks 94; CHECK-NOT: vector.body: 95entry: 96 br label %for.body 97 98for.body: 99 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 100 %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv 101 %in_val = load float, ptr %arrayidx, align 4 102 %call = tail call { float, float } @foo(float %in_val) #0 103 %extract_a = extractvalue { float, float } %call, 0 104 %extract_b = extractvalue { float, float } %call, 1 105 %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv 106 store float %extract_a, ptr %arrayidx2, align 4 107 %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv 108 store float %extract_b, ptr %arrayidx4, align 4 109 %iv.next = add nuw nsw i64 %iv, 1 110 %exitcond.not = icmp eq i64 %iv.next, 1024 111 br i1 %exitcond.not, label %exit, label %for.body 112 113exit: 114 ret void 115} 116 117; TODO: Allow mixed-struct type vectorization and mark overflow intrinsics as trivially vectorizable. 118; CHECK-REMARKS: remark: {{.*}} loop not vectorized: call instruction cannot be vectorized 119define void @test_overflow_intrinsic(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { 120; CHECK-LABEL: define void @test_overflow_intrinsic 121; CHECK-NOT: vector.body: 122; CHECK-NOT: @llvm.sadd.with.overflow.v{{.+}}i32 123entry: 124 br label %for.body 125 126for.body: 127 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 128 %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv 129 %in_val = load i32, ptr %arrayidx, align 4 130 %call = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %in_val, i32 %in_val) 131 %extract_ret = extractvalue { i32, i1 } %call, 0 132 %extract_overflow = extractvalue { i32, i1 } %call, 1 133 %zext_overflow = zext i1 %extract_overflow to i8 134 %arrayidx2 = getelementptr inbounds i32, ptr %out_a, i64 %iv 135 store i32 %extract_ret, ptr %arrayidx2, align 4 136 %arrayidx4 = getelementptr inbounds i8, ptr %out_b, i64 %iv 137 store i8 %zext_overflow, ptr %arrayidx4, align 4 138 %iv.next = add nuw nsw i64 %iv, 1 139 %exitcond.not = icmp eq i64 %iv.next, 1024 140 br i1 %exitcond.not, label %exit, label %for.body 141 142exit: 143 ret void 144} 145 146; TODO: Support vectorization in this case. 147; CHECK-REMARKS: remark: {{.*}} loop not vectorized: Auto-vectorization of calls that return struct types is not yet supported 148define void @struct_return_i32_three_results_widen(ptr noalias %in, ptr noalias writeonly %out_a) { 149; CHECK-LABEL: define void @struct_return_i32_three_results_widen 150; CHECK-NOT: vector.body: 151entry: 152 br label %for.body 153 154for.body: 155 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 156 %arrayidx = getelementptr inbounds i32, ptr %in, i64 %iv 157 %in_val = load i32, ptr %arrayidx, align 4 158 %call = tail call { i32, i32, i32 } @qux(i32 %in_val) #5 159 %extract_a = extractvalue { i32, i32, i32 } %call, 0 160 %arrayidx2 = getelementptr inbounds i32, ptr %out_a, i64 %iv 161 store i32 %extract_a, ptr %arrayidx2, align 4 162 %iv.next = add nuw nsw i64 %iv, 1 163 %exitcond.not = icmp eq i64 %iv.next, 1024 164 br i1 %exitcond.not, label %exit, label %for.body 165 166exit: 167 ret void 168} 169 170; Negative test. Widening structs of vectors is not supported. 171; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized 172define void @negative_struct_of_vectors(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { 173; CHECK-LABEL: define void @negative_struct_of_vectors 174; CHECK-NOT: vector.body: 175entry: 176 br label %for.body 177 178for.body: 179 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 180 %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv 181 %in_val = load <1 x float>, ptr %arrayidx, align 4 182 %call = tail call { <1 x float>, <1 x float> } @foo(<1 x float> %in_val) #0 183 %extract_a = extractvalue { <1 x float>, <1 x float> } %call, 0 184 %extract_b = extractvalue { <1 x float>, <1 x float> } %call, 1 185 %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv 186 store <1 x float> %extract_a, ptr %arrayidx2, align 4 187 %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv 188 store <1 x float> %extract_b, ptr %arrayidx4, align 4 189 %iv.next = add nuw nsw i64 %iv, 1 190 %exitcond.not = icmp eq i64 %iv.next, 1024 191 br i1 %exitcond.not, label %exit, label %for.body 192 193exit: 194 ret void 195} 196 197; Negative test. Widening structs with mixed element types is not supported. 198; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized 199define void @negative_mixed_element_type_struct_return(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { 200; CHECK-LABEL: define void @negative_mixed_element_type_struct_return 201; CHECK-NOT: vector.body: 202; CHECK-NOT: call {{.*}} @fixed_vec_baz 203entry: 204 br label %for.body 205 206for.body: 207 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 208 %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv 209 %in_val = load float, ptr %arrayidx, align 4 210 %call = tail call { float, i32 } @baz(float %in_val) #2 211 %extract_a = extractvalue { float, i32 } %call, 0 212 %extract_b = extractvalue { float, i32 } %call, 1 213 %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv 214 store float %extract_a, ptr %arrayidx2, align 4 215 %arrayidx4 = getelementptr inbounds i32, ptr %out_b, i64 %iv 216 store i32 %extract_b, ptr %arrayidx4, align 4 217 %iv.next = add nuw nsw i64 %iv, 1 218 %exitcond.not = icmp eq i64 %iv.next, 1024 219 br i1 %exitcond.not, label %exit, label %for.body 220 221exit: 222 ret void 223} 224 225%named_struct = type { double, double } 226 227; Negative test. Widening non-literal structs is not supported. 228; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized 229define void @negative_named_struct_return(ptr noalias readonly %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { 230; CHECK-LABEL: define void @negative_named_struct_return 231; CHECK-NOT: vector.body: 232; CHECK-NOT: call {{.*}} @fixed_vec_bar 233entry: 234 br label %for.body 235 236for.body: 237 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 238 %arrayidx = getelementptr inbounds double, ptr %in, i64 %iv 239 %in_val = load double, ptr %arrayidx, align 8 240 %call = tail call %named_struct @bar_named(double %in_val) #4 241 %extract_a = extractvalue %named_struct %call, 0 242 %extract_b = extractvalue %named_struct %call, 1 243 %arrayidx2 = getelementptr inbounds double, ptr %out_a, i64 %iv 244 store double %extract_a, ptr %arrayidx2, align 8 245 %arrayidx4 = getelementptr inbounds double, ptr %out_b, i64 %iv 246 store double %extract_b, ptr %arrayidx4, align 8 247 %iv.next = add nuw nsw i64 %iv, 1 248 %exitcond.not = icmp eq i64 %iv.next, 1024 249 br i1 %exitcond.not, label %exit, label %for.body 250 251exit: 252 ret void 253} 254 255; Negative test. Nested homogeneous structs are not supported. 256; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized 257define void @negative_nested_struct(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { 258; CHECK-LABEL: define void @negative_nested_struct 259; CHECK-NOT: vector.body: 260entry: 261 br label %for.body 262 263for.body: 264 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 265 %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv 266 %in_val = load float, ptr %arrayidx, align 4 267 %call = tail call { { float, float } } @foo_nested_struct(float %in_val) #0 268 %extract_inner = extractvalue { { float, float } } %call, 0 269 %extract_a = extractvalue { float, float } %extract_inner, 0 270 %extract_b = extractvalue { float, float } %extract_inner, 1 271 %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv 272 store float %extract_a, ptr %arrayidx2, align 4 273 %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv 274 store float %extract_b, ptr %arrayidx4, align 4 275 %iv.next = add nuw nsw i64 %iv, 1 276 %exitcond.not = icmp eq i64 %iv.next, 1024 277 br i1 %exitcond.not, label %exit, label %for.body 278 279exit: 280 ret void 281} 282 283; Negative test. The second element of the struct cannot be widened. 284; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized 285define void @negative_non_widenable_element(ptr noalias %in, ptr noalias writeonly %out_a) { 286; CHECK-LABEL: define void @negative_non_widenable_element 287; CHECK-NOT: vector.body: 288entry: 289 br label %for.body 290 291for.body: 292 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 293 %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv 294 %in_val = load float, ptr %arrayidx, align 4 295 %call = tail call { float, [1 x float] } @foo_one_non_widenable_element(float %in_val) #0 296 %extract_a = extractvalue { float, [1 x float] } %call, 0 297 %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv 298 store float %extract_a, ptr %arrayidx2, align 4 299 %iv.next = add nuw nsw i64 %iv, 1 300 %exitcond.not = icmp eq i64 %iv.next, 1024 301 br i1 %exitcond.not, label %exit, label %for.body 302 303exit: 304 ret void 305} 306 307; Negative test. Homogeneous structs of arrays are not supported. 308; CHECK-REMARKS-COUNT: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized 309define void @negative_struct_array_elements(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { 310; CHECK-LABEL: define void @negative_struct_array_elements 311; CHECK-NOT: vector.body: 312entry: 313 br label %for.body 314 315for.body: 316 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 317 %arrayidx = getelementptr inbounds float, ptr %in, i64 %iv 318 %in_val = load float, ptr %arrayidx, align 4 319 %call = tail call { [2 x float] } @foo_arrays(float %in_val) #0 320 %extract_inner = extractvalue { [2 x float] } %call, 0 321 %extract_a = extractvalue [2 x float] %extract_inner, 0 322 %extract_b = extractvalue [2 x float] %extract_inner, 1 323 %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv 324 store float %extract_a, ptr %arrayidx2, align 4 325 %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv 326 store float %extract_b, ptr %arrayidx4, align 4 327 %iv.next = add nuw nsw i64 %iv, 1 328 %exitcond.not = icmp eq i64 %iv.next, 1024 329 br i1 %exitcond.not, label %exit, label %for.body 330 331exit: 332 ret void 333} 334 335; Negative test. Widening struct loads is not supported. 336; CHECK-REMARKS: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized 337define void @negative_struct_load(ptr noalias %in, ptr noalias writeonly %out_a, ptr noalias writeonly %out_b) { 338; CHECK-LABEL: define void @negative_struct_load 339; CHECK-NOT: vector.body: 340entry: 341 br label %for.body 342 343for.body: 344 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 345 %arrayidx = getelementptr inbounds { float, float }, ptr %in, i64 %iv 346 %call = load { float, float }, ptr %arrayidx, align 8 347 %extract_a = extractvalue { float, float } %call, 0 348 %extract_b = extractvalue { float, float } %call, 1 349 %arrayidx2 = getelementptr inbounds float, ptr %out_a, i64 %iv 350 store float %extract_a, ptr %arrayidx2, align 4 351 %arrayidx4 = getelementptr inbounds float, ptr %out_b, i64 %iv 352 store float %extract_b, ptr %arrayidx4, align 4 353 %iv.next = add nuw nsw i64 %iv, 1 354 %exitcond.not = icmp eq i64 %iv.next, 1024 355 br i1 %exitcond.not, label %exit, label %for.body 356 357exit: 358 ret void 359} 360 361; Negative test. Widening struct stores is not supported. 362; CHECK-REMARKS: remark: {{.*}} loop not vectorized: instruction return type cannot be vectorized 363define void @negative_struct_return_store_struct(ptr noalias %in, ptr noalias writeonly %out) { 364; CHECK-LABEL: define void @negative_struct_return_store_struct 365; CHECK-NOT: vector.body: 366entry: 367 br label %for.body 368 369for.body: 370 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 371 %arrayidx = getelementptr inbounds { float, float }, ptr %in, i64 %iv 372 %in_val = load float, ptr %arrayidx, align 4 373 %call = tail call { float, float } @foo(float %in_val) #0 374 %out_ptr = getelementptr inbounds { float, float }, ptr %out, i64 %iv 375 store { float, float } %call, ptr %out_ptr, align 8 376 %iv.next = add nuw nsw i64 %iv, 1 377 %exitcond.not = icmp eq i64 %iv.next, 1024 378 br i1 %exitcond.not, label %exit, label %for.body 379 380exit: 381 ret void 382} 383 384declare { float, float } @foo(float) 385declare { double, double } @bar(double) 386declare { float, i32 } @baz(float) 387declare %named_struct @bar_named(double) 388declare { { float, float } } @foo_nested_struct(float) 389declare { [2 x float] } @foo_arrays(float) 390declare { float, [1 x float] } @foo_one_non_widenable_element(float) 391declare { <1 x float>, <1 x float> } @foo_vectors(<1 x float>) 392declare { i32, i32, i32 } @qux(i32) 393 394declare { <2 x float>, <2 x float> } @fixed_vec_foo(<2 x float>) 395declare { <2 x double>, <2 x double> } @fixed_vec_bar(<2 x double>) 396declare { <2 x float>, <2 x i32> } @fixed_vec_baz(<2 x float>) 397declare { <2 x i32>, <2 x i32>, <2 x i32> } @fixed_vec_qux(<2 x i32>) 398 399declare { <vscale x 4 x float>, <vscale x 4 x float> } @scalable_vec_masked_foo(<vscale x 4 x float>, <vscale x 4 x i1>) 400 401attributes #0 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_foo(fixed_vec_foo)" } 402attributes #1 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_bar(fixed_vec_bar)" } 403attributes #2 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_baz(fixed_vec_baz)" } 404attributes #3 = { nounwind "vector-function-abi-variant"="_ZGVsMxv_foo(scalable_vec_masked_foo)" } 405attributes #4 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_bar_named(fixed_vec_bar)" } 406attributes #5 = { nounwind "vector-function-abi-variant"="_ZGVnN2v_qux(fixed_vec_qux)" } 407