1; RUN: opt -passes=loop-vectorize -mtriple=arm64-apple-iphones -force-vector-width=4 -force-vector-interleave=1 %s -S | FileCheck %s 2 3; Vectors with i4 elements may not legal with nontemporal stores. 4define void @test_i4_store(ptr %ddst) { 5; CHECK-LABEL: define void @test_i4_store( 6; CHECK-NOT: vector.body: 7; CHECK: ret void 8; 9entry: 10 br label %for.body 11 12for.body: ; preds = %entry, %for.body 13 %i = phi i32 [ 0, %entry ], [ %add, %for.body ] 14 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] 15 %incdec.ptr = getelementptr inbounds i4, ptr %ddst.addr, i64 1 16 store i4 10, ptr %ddst.addr, align 4, !nontemporal !8 17 %add = add nuw nsw i32 %i, 4 18 %cmp = icmp ult i32 %i, 4092 19 br i1 %cmp, label %for.body, label %for.cond.cleanup 20 21for.cond.cleanup: ; preds = %for.body 22 ret void 23} 24 25define void @test_i8_store(ptr %ddst) { 26; CHECK-LABEL: define void @test_i8_store( 27; CHECK-LABEL: vector.body: 28; CHECK: store <4 x i8> {{.*}} !nontemporal !0 29; CHECK: br 30; 31entry: 32 br label %for.body 33 34for.body: ; preds = %entry, %for.body 35 %i = phi i32 [ 0, %entry ], [ %add, %for.body ] 36 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] 37 %incdec.ptr = getelementptr inbounds i8, ptr %ddst.addr, i64 1 38 store i8 10, ptr %ddst.addr, align 4, !nontemporal !8 39 %add = add nuw nsw i32 %i, 4 40 %cmp = icmp ult i32 %i, 4092 41 br i1 %cmp, label %for.body, label %for.cond.cleanup 42 43for.cond.cleanup: ; preds = %for.body 44 ret void 45} 46 47define void @test_half_store(ptr %ddst) { 48; CHECK-LABEL: define void @test_half_store( 49; CHECK-LABEL: vector.body: 50; CHECK: store <4 x half> {{.*}} !nontemporal !0 51; CHECK: br 52; 53entry: 54 br label %for.body 55 56for.body: ; preds = %entry, %for.body 57 %i = phi i32 [ 0, %entry ], [ %add, %for.body ] 58 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] 59 %incdec.ptr = getelementptr inbounds half, ptr %ddst.addr, i64 1 60 store half 10.0, ptr %ddst.addr, align 4, !nontemporal !8 61 %add = add nuw nsw i32 %i, 4 62 %cmp = icmp ult i32 %i, 4092 63 br i1 %cmp, label %for.body, label %for.cond.cleanup 64 65for.cond.cleanup: ; preds = %for.body 66 ret void 67} 68 69define void @test_i16_store(ptr %ddst) { 70; CHECK-LABEL: define void @test_i16_store( 71; CHECK-LABEL: vector.body: 72; CHECK: store <4 x i16> {{.*}} !nontemporal !0 73; CHECK: br 74; 75entry: 76 br label %for.body 77 78for.body: ; preds = %entry, %for.body 79 %i = phi i32 [ 0, %entry ], [ %add, %for.body ] 80 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] 81 %incdec.ptr = getelementptr inbounds i16, ptr %ddst.addr, i64 1 82 store i16 10, ptr %ddst.addr, align 4, !nontemporal !8 83 %add = add nuw nsw i32 %i, 4 84 %cmp = icmp ult i32 %i, 4092 85 br i1 %cmp, label %for.body, label %for.cond.cleanup 86 87for.cond.cleanup: ; preds = %for.body 88 ret void 89} 90 91define void @test_i32_store(ptr nocapture %ddst) { 92; CHECK-LABEL: define void @test_i32_store( 93; CHECK-LABEL: vector.body: 94; CHECK: store <16 x i32> {{.*}} !nontemporal !0 95; CHECK: br 96; 97entry: 98 br label %for.body 99 100for.body: ; preds = %entry, %for.body 101 %i = phi i32 [ 0, %entry ], [ %add, %for.body ] 102 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr3, %for.body ] 103 %incdec.ptr = getelementptr inbounds i32, ptr %ddst.addr, i64 1 104 store i32 10, ptr %ddst.addr, align 4, !nontemporal !8 105 %incdec.ptr1 = getelementptr inbounds i32, ptr %ddst.addr, i64 2 106 store i32 20, ptr %incdec.ptr, align 4, !nontemporal !8 107 %incdec.ptr2 = getelementptr inbounds i32, ptr %ddst.addr, i64 3 108 store i32 30, ptr %incdec.ptr1, align 4, !nontemporal !8 109 %incdec.ptr3 = getelementptr inbounds i32, ptr %ddst.addr, i64 4 110 store i32 40, ptr %incdec.ptr2, align 4, !nontemporal !8 111 %add = add nuw nsw i32 %i, 4 112 %cmp = icmp ult i32 %i, 4092 113 br i1 %cmp, label %for.body, label %for.cond.cleanup 114 115for.cond.cleanup: ; preds = %for.body 116 ret void 117} 118 119define void @test_i33_store(ptr nocapture %ddst) { 120; CHECK-LABEL: define void @test_i33_store( 121; CHECK-NOT: vector.body: 122; CHECK: ret 123; 124entry: 125 br label %for.body 126 127for.body: ; preds = %entry, %for.body 128 %i = phi i32 [ 0, %entry ], [ %add, %for.body ] 129 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr3, %for.body ] 130 %incdec.ptr = getelementptr inbounds i33, ptr %ddst.addr, i64 1 131 store i33 10, ptr %ddst.addr, align 4, !nontemporal !8 132 %incdec.ptr1 = getelementptr inbounds i33, ptr %ddst.addr, i64 2 133 store i33 20, ptr %incdec.ptr, align 4, !nontemporal !8 134 %incdec.ptr2 = getelementptr inbounds i33, ptr %ddst.addr, i64 3 135 store i33 30, ptr %incdec.ptr1, align 4, !nontemporal !8 136 %incdec.ptr3 = getelementptr inbounds i33, ptr %ddst.addr, i64 4 137 store i33 40, ptr %incdec.ptr2, align 4, !nontemporal !8 138 %add = add nuw nsw i32 %i, 3 139 %cmp = icmp ult i32 %i, 4092 140 br i1 %cmp, label %for.body, label %for.cond.cleanup 141 142for.cond.cleanup: ; preds = %for.body 143 ret void 144} 145 146define void @test_i40_store(ptr nocapture %ddst) { 147; CHECK-LABEL: define void @test_i40_store( 148; CHECK-NOT: vector.body: 149; CHECK: ret 150; 151entry: 152 br label %for.body 153 154for.body: ; preds = %entry, %for.body 155 %i = phi i32 [ 0, %entry ], [ %add, %for.body ] 156 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr3, %for.body ] 157 %incdec.ptr = getelementptr inbounds i40, ptr %ddst.addr, i64 1 158 store i40 10, ptr %ddst.addr, align 4, !nontemporal !8 159 %incdec.ptr1 = getelementptr inbounds i40, ptr %ddst.addr, i64 2 160 store i40 20, ptr %incdec.ptr, align 4, !nontemporal !8 161 %incdec.ptr2 = getelementptr inbounds i40, ptr %ddst.addr, i64 3 162 store i40 30, ptr %incdec.ptr1, align 4, !nontemporal !8 163 %incdec.ptr3 = getelementptr inbounds i40, ptr %ddst.addr, i64 4 164 store i40 40, ptr %incdec.ptr2, align 4, !nontemporal !8 165 %add = add nuw nsw i32 %i, 3 166 %cmp = icmp ult i32 %i, 4092 167 br i1 %cmp, label %for.body, label %for.cond.cleanup 168 169for.cond.cleanup: ; preds = %for.body 170 ret void 171} 172define void @test_i64_store(ptr nocapture %ddst) local_unnamed_addr #0 { 173; CHECK-LABEL: define void @test_i64_store( 174; CHECK-LABEL: vector.body: 175; CHECK: store <4 x i64> {{.*}} !nontemporal !0 176; CHECK: br 177; 178entry: 179 br label %for.body 180 181for.body: ; preds = %entry, %for.body 182 %i = phi i32 [ 0, %entry ], [ %add, %for.body ] 183 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] 184 %incdec.ptr = getelementptr inbounds i64, ptr %ddst.addr, i64 1 185 store i64 10, ptr %ddst.addr, align 4, !nontemporal !8 186 %add = add nuw nsw i32 %i, 4 187 %cmp = icmp ult i32 %i, 4092 188 br i1 %cmp, label %for.body, label %for.cond.cleanup 189 190for.cond.cleanup: ; preds = %for.body 191 ret void 192} 193 194define void @test_double_store(ptr %ddst) { 195; CHECK-LABEL: define void @test_double_store( 196; CHECK-LABEL: vector.body: 197; CHECK: store <4 x double> {{.*}} !nontemporal !0 198; CHECK: br 199; 200entry: 201 br label %for.body 202 203for.body: ; preds = %entry, %for.body 204 %i = phi i32 [ 0, %entry ], [ %add, %for.body ] 205 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] 206 %incdec.ptr = getelementptr inbounds double, ptr %ddst.addr, i64 1 207 store double 10.0, ptr %ddst.addr, align 4, !nontemporal !8 208 %add = add nuw nsw i32 %i, 4 209 %cmp = icmp ult i32 %i, 4092 210 br i1 %cmp, label %for.body, label %for.cond.cleanup 211 212for.cond.cleanup: ; preds = %for.body 213 ret void 214} 215 216define void @test_i128_store(ptr %ddst) { 217; CHECK-LABEL: define void @test_i128_store( 218; CHECK-LABEL: vector.body: 219; CHECK: store <4 x i128> {{.*}} !nontemporal !0 220; CHECK: br 221; 222entry: 223 br label %for.body 224 225for.body: ; preds = %entry, %for.body 226 %i = phi i32 [ 0, %entry ], [ %add, %for.body ] 227 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] 228 %incdec.ptr = getelementptr inbounds i128, ptr %ddst.addr, i64 1 229 store i128 10, ptr %ddst.addr, align 4, !nontemporal !8 230 %add = add nuw nsw i32 %i, 4 231 %cmp = icmp ult i32 %i, 4092 232 br i1 %cmp, label %for.body, label %for.cond.cleanup 233 234for.cond.cleanup: ; preds = %for.body 235 ret void 236} 237 238define void @test_i256_store(ptr %ddst) { 239; CHECK-LABEL: define void @test_i256_store( 240; CHECK-NOT: vector.body: 241; CHECK: ret void 242; 243entry: 244 br label %for.body 245 246for.body: ; preds = %entry, %for.body 247 %i = phi i32 [ 0, %entry ], [ %add, %for.body ] 248 %ddst.addr = phi ptr [ %ddst, %entry ], [ %incdec.ptr, %for.body ] 249 %incdec.ptr = getelementptr inbounds i256, ptr %ddst.addr, i64 1 250 store i256 10, ptr %ddst.addr, align 4, !nontemporal !8 251 %add = add nuw nsw i32 %i, 4 252 %cmp = icmp ult i32 %i, 4092 253 br i1 %cmp, label %for.body, label %for.cond.cleanup 254 255for.cond.cleanup: ; preds = %for.body 256 ret void 257} 258 259define i4 @test_i4_load(ptr %ddst) { 260; CHECK-LABEL: define i4 @test_i4_load 261; CHECK-NOT: vector.body: 262; CHECk: ret i4 %{{.*}} 263; 264entry: 265 br label %for.body 266 267for.body: ; preds = %entry, %for.body 268 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 269 %acc.08 = phi i4 [ 0, %entry ], [ %add, %for.body ] 270 %arrayidx = getelementptr inbounds i4, ptr %ddst, i64 %indvars.iv 271 %l = load i4, ptr %arrayidx, align 1, !nontemporal !8 272 %add = add i4 %l, %acc.08 273 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 274 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 275 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 276 277for.cond.cleanup: ; preds = %for.body 278 ret i4 %add 279} 280 281define i8 @test_load_i8(ptr %ddst) { 282; CHECK-LABEL: @test_load_i8( 283; CHECK: vector.body: 284; CHECK: load <4 x i8>, ptr {{.*}}, align 1, !nontemporal !0 285; CHECk: ret i8 %{{.*}} 286; 287entry: 288 br label %for.body 289 290for.body: ; preds = %entry, %for.body 291 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 292 %acc.08 = phi i8 [ 0, %entry ], [ %add, %for.body ] 293 %arrayidx = getelementptr inbounds i8, ptr %ddst, i64 %indvars.iv 294 %l = load i8, ptr %arrayidx, align 1, !nontemporal !8 295 %add = add i8 %l, %acc.08 296 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 297 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 298 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 299 300for.cond.cleanup: ; preds = %for.body 301 ret i8 %add 302} 303 304define half @test_half_load(ptr %ddst) { 305; CHECK-LABEL: @test_half_load 306; CHECK-LABEL: vector.body: 307; CHECK: load <4 x half>, ptr {{.*}}, align 2, !nontemporal !0 308; CHECk: ret half %{{.*}} 309; 310entry: 311 br label %for.body 312 313for.body: ; preds = %entry, %for.body 314 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 315 %acc.08 = phi half [ 0.0, %entry ], [ %add, %for.body ] 316 %arrayidx = getelementptr inbounds half, ptr %ddst, i64 %indvars.iv 317 %l = load half, ptr %arrayidx, align 2, !nontemporal !8 318 %add = fadd half %l, %acc.08 319 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 320 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 321 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 322 323for.cond.cleanup: ; preds = %for.body 324 ret half %add 325} 326 327define i16 @test_i16_load(ptr %ddst) { 328; CHECK-LABEL: @test_i16_load 329; CHECK-LABEL: vector.body: 330; CHECK: load <4 x i16>, ptr {{.*}}, align 2, !nontemporal !0 331; CHECk: ret i16 %{{.*}} 332; 333entry: 334 br label %for.body 335 336for.body: ; preds = %entry, %for.body 337 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 338 %acc.08 = phi i16 [ 0, %entry ], [ %add, %for.body ] 339 %arrayidx = getelementptr inbounds i16, ptr %ddst, i64 %indvars.iv 340 %l = load i16, ptr %arrayidx, align 2, !nontemporal !8 341 %add = add i16 %l, %acc.08 342 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 343 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 344 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 345 346for.cond.cleanup: ; preds = %for.body 347 ret i16 %add 348} 349 350define i32 @test_i32_load(ptr %ddst) { 351; CHECK-LABEL: @test_i32_load 352; CHECK-LABEL: vector.body: 353; CHECK: load <4 x i32>, ptr {{.*}}, align 4, !nontemporal !0 354; CHECk: ret i32 %{{.*}} 355; 356entry: 357 br label %for.body 358 359for.body: ; preds = %entry, %for.body 360 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 361 %acc.08 = phi i32 [ 0, %entry ], [ %add, %for.body ] 362 %arrayidx = getelementptr inbounds i32, ptr %ddst, i64 %indvars.iv 363 %l = load i32, ptr %arrayidx, align 4, !nontemporal !8 364 %add = add i32 %l, %acc.08 365 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 366 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 367 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 368 369for.cond.cleanup: ; preds = %for.body 370 ret i32 %add 371} 372 373define i33 @test_i33_load(ptr %ddst) { 374; CHECK-LABEL: @test_i33_load 375; CHECK-NOT: vector.body: 376; CHECk: ret i33 %{{.*}} 377; 378entry: 379 br label %for.body 380 381for.body: ; preds = %entry, %for.body 382 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 383 %acc.08 = phi i33 [ 0, %entry ], [ %add, %for.body ] 384 %arrayidx = getelementptr inbounds i33, ptr %ddst, i64 %indvars.iv 385 %l = load i33, ptr %arrayidx, align 4, !nontemporal !8 386 %add = add i33 %l, %acc.08 387 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 388 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 389 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 390 391for.cond.cleanup: ; preds = %for.body 392 ret i33 %add 393} 394 395define i40 @test_i40_load(ptr %ddst) { 396; CHECK-LABEL: @test_i40_load 397; CHECK-NOT: vector.body: 398; CHECk: ret i40 %{{.*}} 399; 400entry: 401 br label %for.body 402 403for.body: ; preds = %entry, %for.body 404 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 405 %acc.08 = phi i40 [ 0, %entry ], [ %add, %for.body ] 406 %arrayidx = getelementptr inbounds i40, ptr %ddst, i64 %indvars.iv 407 %l = load i40, ptr %arrayidx, align 4, !nontemporal !8 408 %add = add i40 %l, %acc.08 409 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 410 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 411 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 412 413for.cond.cleanup: ; preds = %for.body 414 ret i40 %add 415} 416 417define i64 @test_i64_load(ptr %ddst) { 418; CHECK-LABEL: @test_i64_load 419; CHECK-LABEL: vector.body: 420; CHECK: load <4 x i64>, ptr {{.*}}, align 4, !nontemporal !0 421; CHECk: ret i64 %{{.*}} 422; 423entry: 424 br label %for.body 425 426for.body: ; preds = %entry, %for.body 427 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 428 %acc.08 = phi i64 [ 0, %entry ], [ %add, %for.body ] 429 %arrayidx = getelementptr inbounds i64, ptr %ddst, i64 %indvars.iv 430 %l = load i64, ptr %arrayidx, align 4, !nontemporal !8 431 %add = add i64 %l, %acc.08 432 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 433 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 434 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 435 436for.cond.cleanup: ; preds = %for.body 437 ret i64 %add 438} 439 440define double @test_double_load(ptr %ddst) { 441; CHECK-LABEL: @test_double_load 442; CHECK-LABEL: vector.body: 443; CHECK: load <4 x double>, ptr {{.*}}, align 4, !nontemporal !0 444; CHECk: ret double %{{.*}} 445; 446entry: 447 br label %for.body 448 449for.body: ; preds = %entry, %for.body 450 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 451 %acc.08 = phi double [ 0.0, %entry ], [ %add, %for.body ] 452 %arrayidx = getelementptr inbounds double, ptr %ddst, i64 %indvars.iv 453 %l = load double, ptr %arrayidx, align 4, !nontemporal !8 454 %add = fadd double %l, %acc.08 455 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 456 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 457 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 458 459for.cond.cleanup: ; preds = %for.body 460 ret double %add 461} 462 463define i128 @test_i128_load(ptr %ddst) { 464; CHECK-LABEL: @test_i128_load 465; CHECK-LABEL: vector.body: 466; CHECK: load <4 x i128>, ptr {{.*}}, align 4, !nontemporal !0 467; CHECk: ret i128 %{{.*}} 468; 469entry: 470 br label %for.body 471 472for.body: ; preds = %entry, %for.body 473 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 474 %acc.08 = phi i128 [ 0, %entry ], [ %add, %for.body ] 475 %arrayidx = getelementptr inbounds i128, ptr %ddst, i64 %indvars.iv 476 %l = load i128, ptr %arrayidx, align 4, !nontemporal !8 477 %add = add i128 %l, %acc.08 478 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 479 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 480 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 481 482for.cond.cleanup: ; preds = %for.body 483 ret i128 %add 484} 485 486define i256 @test_256_load(ptr %ddst) { 487; CHECK-LABEL: @test_256_load 488; CHECK-NOT: vector.body: 489; CHECk: ret i256 %{{.*}} 490; 491entry: 492 br label %for.body 493 494for.body: ; preds = %entry, %for.body 495 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 496 %acc.08 = phi i256 [ 0, %entry ], [ %add, %for.body ] 497 %arrayidx = getelementptr inbounds i256, ptr %ddst, i64 %indvars.iv 498 %l = load i256, ptr %arrayidx, align 4, !nontemporal !8 499 %add = add i256 %l, %acc.08 500 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 501 %exitcond.not = icmp eq i64 %indvars.iv.next, 4092 502 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body 503 504for.cond.cleanup: ; preds = %for.body 505 ret i256 %add 506} 507 508!8 = !{i32 1} 509