1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s 3 4;;; Test store instructions 5;;; 6;;; Note: 7;;; We test store instructions using general stack, stack with dynamic 8;;; allocation, stack with dynamic allocation and alignment, and stack 9;;; with dynamic allocation, alignment, and spill. 10;;; 11;;; Fist test using a stack for leaf function. 12;;; 13;;; | | Higher address 14;;; |----------------------------------------------| <- old sp 15;;; | Local variables of fixed size | 16;;; |----------------------------------------------| <- sp 17;;; | | Lower address 18;;; 19;;; Access local variable using sp (%s11). In addition, please remember 20;;; that stack is aligned by 16 bytes. 21;;; 22;;; Second test using a general stack. 23;;; 24;;; | | Higher address 25;;; |----------------------------------------------| 26;;; | Parameter area for this function | 27;;; |----------------------------------------------| 28;;; | Register save area (RSA) for this function | 29;;; |----------------------------------------------| 30;;; | Return address for this function | 31;;; |----------------------------------------------| 32;;; | Frame pointer for this function | 33;;; |----------------------------------------------| <- fp(=old sp) 34;;; | Local variables of fixed size | 35;;; |----------------------------------------------| 36;;; |.variable-sized.local.variables.(VLAs)........| 37;;; |..............................................| 38;;; |..............................................| 39;;; |----------------------------------------------| <- returned by alloca 40;;; | Parameter area for callee | 41;;; |----------------------------------------------| 42;;; | Register save area (RSA) for callee | 43;;; |----------------------------------------------| 44;;; | Return address for callee | 45;;; |----------------------------------------------| 46;;; | Frame pointer for callee | 47;;; |----------------------------------------------| <- sp 48;;; | | Lower address 49;;; 50;;; Access local variable using fp (%s9) since the size of VLA is not 51;;; known. At the beginning of the functions, allocates 240 + data 52;;; bytes. 240 means RSA+RA+FP (=176) + Parameter (=64). 53;;; 54;;; Third test using a general stack. 55;;; 56;;; | | Higher address 57;;; |----------------------------------------------| 58;;; | Parameter area for this function | 59;;; |----------------------------------------------| 60;;; | Register save area (RSA) for this function | 61;;; |----------------------------------------------| 62;;; | Return address for this function | 63;;; |----------------------------------------------| 64;;; | Frame pointer for this function | 65;;; |----------------------------------------------| <- fp(=old sp) 66;;; |.empty.space.to.make.part.below.aligned.in....| 67;;; |.case.it.needs.more.than.the.standard.16-byte.| (size of this area is 68;;; |.alignment....................................| unknown at compile time) 69;;; |----------------------------------------------| 70;;; | Local variables of fixed size including spill| 71;;; | slots | 72;;; |----------------------------------------------| <- bp(not defined by ABI, 73;;; |.variable-sized.local.variables.(VLAs)........| LLVM chooses SX17) 74;;; |..............................................| (size of this area is 75;;; |..............................................| unknown at compile time) 76;;; |----------------------------------------------| <- stack top (returned by 77;;; | Parameter area for callee | alloca) 78;;; |----------------------------------------------| 79;;; | Register save area (RSA) for callee | 80;;; |----------------------------------------------| 81;;; | Return address for callee | 82;;; |----------------------------------------------| 83;;; | Frame pointer for callee | 84;;; |----------------------------------------------| <- sp 85;;; | | Lower address 86;;; 87;;; Access local variable using bp (%s17) since the size of alignment 88;;; and VLA are not known. At the beginning of the functions, allocates 89;;; pad(240 + data + align) bytes. Then, access data through bp + pad(240) 90;;; since this address doesn't change even if VLA is dynamically allocated. 91;;; 92;;; Fourth test using a general stack with some spills. 93;;; 94 95; Function Attrs: argmemonly nofree nounwind 96define fastcc void @store__vm256_stk(<256 x i1> noundef %0) { 97; CHECK-LABEL: store__vm256_stk: 98; CHECK: # %bb.0: 99; CHECK-NEXT: st %s9, (, %s11) 100; CHECK-NEXT: st %s10, 8(, %s11) 101; CHECK-NEXT: or %s9, 0, %s11 102; CHECK-NEXT: lea %s11, -224(, %s11) 103; CHECK-NEXT: and %s11, %s11, (59)1 104; CHECK-NEXT: brge.l.t %s11, %s8, .LBB0_2 105; CHECK-NEXT: # %bb.1: 106; CHECK-NEXT: ld %s61, 24(, %s14) 107; CHECK-NEXT: or %s62, 0, %s0 108; CHECK-NEXT: lea %s63, 315 109; CHECK-NEXT: shm.l %s63, (%s61) 110; CHECK-NEXT: shm.l %s8, 8(%s61) 111; CHECK-NEXT: shm.l %s11, 16(%s61) 112; CHECK-NEXT: monc 113; CHECK-NEXT: or %s0, 0, %s62 114; CHECK-NEXT: .LBB0_2: 115; CHECK-NEXT: svm %s16, %vm1, 0 116; CHECK-NEXT: st %s16, 192(, %s11) 117; CHECK-NEXT: svm %s16, %vm1, 1 118; CHECK-NEXT: st %s16, 200(, %s11) 119; CHECK-NEXT: svm %s16, %vm1, 2 120; CHECK-NEXT: st %s16, 208(, %s11) 121; CHECK-NEXT: svm %s16, %vm1, 3 122; CHECK-NEXT: st %s16, 216(, %s11) 123; CHECK-NEXT: or %s11, 0, %s9 124; CHECK-NEXT: ld %s10, 8(, %s11) 125; CHECK-NEXT: ld %s9, (, %s11) 126; CHECK-NEXT: b.l.t (, %s10) 127 %2 = alloca <256 x i1>, align 32 128 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %2) 129 store volatile <256 x i1> %0, ptr %2, align 32, !tbaa !3 130 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %2) 131 ret void 132} 133 134; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn 135declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) 136 137; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn 138declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) 139 140; Function Attrs: argmemonly nofree nounwind 141define fastcc void @store__vm256_stk_big_fit(<256 x i1> noundef %0, i64 noundef %1) { 142; CHECK-LABEL: store__vm256_stk_big_fit: 143; CHECK: # %bb.0: 144; CHECK-NEXT: st %s9, (, %s11) 145; CHECK-NEXT: st %s10, 8(, %s11) 146; CHECK-NEXT: or %s9, 0, %s11 147; CHECK-NEXT: lea %s11, -2147483648(, %s11) 148; CHECK-NEXT: and %s11, %s11, (59)1 149; CHECK-NEXT: brge.l %s11, %s8, .LBB1_4 150; CHECK-NEXT: # %bb.3: 151; CHECK-NEXT: ld %s61, 24(, %s14) 152; CHECK-NEXT: or %s62, 0, %s0 153; CHECK-NEXT: lea %s63, 315 154; CHECK-NEXT: shm.l %s63, (%s61) 155; CHECK-NEXT: shm.l %s8, 8(%s61) 156; CHECK-NEXT: shm.l %s11, 16(%s61) 157; CHECK-NEXT: monc 158; CHECK-NEXT: or %s0, 0, %s62 159; CHECK-NEXT: .LBB1_4: 160; CHECK-NEXT: svm %s16, %vm1, 0 161; CHECK-NEXT: st %s16, 2147483616(, %s11) 162; CHECK-NEXT: svm %s16, %vm1, 1 163; CHECK-NEXT: st %s16, 2147483624(, %s11) 164; CHECK-NEXT: svm %s16, %vm1, 2 165; CHECK-NEXT: st %s16, 2147483632(, %s11) 166; CHECK-NEXT: svm %s16, %vm1, 3 167; CHECK-NEXT: st %s16, 2147483640(, %s11) 168; CHECK-NEXT: or %s1, 0, (0)1 169; CHECK-NEXT: lea %s2, 2147483424 170; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 171; CHECK-NEXT: st %s0, 192(%s1, %s11) 172; CHECK-NEXT: lea %s1, 8(, %s1) 173; CHECK-NEXT: brne.l %s1, %s2, .LBB1_1 174; CHECK-NEXT: # %bb.2: 175; CHECK-NEXT: or %s11, 0, %s9 176; CHECK-NEXT: ld %s10, 8(, %s11) 177; CHECK-NEXT: ld %s9, (, %s11) 178; CHECK-NEXT: b.l.t (, %s10) 179 %3 = alloca <256 x i1>, align 32 180 %4 = alloca [268435428 x i64], align 8 181 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3) 182 call void @llvm.lifetime.start.p0(i64 2147483424, ptr nonnull %4) 183 store volatile <256 x i1> %0, ptr %3, align 32, !tbaa !3 184 br label %6 185 1865: ; preds = %6 187 call void @llvm.lifetime.end.p0(i64 2147483424, ptr nonnull %4) 188 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3) 189 ret void 190 1916: ; preds = %2, %6 192 %7 = phi i64 [ 0, %2 ], [ %9, %6 ] 193 %8 = getelementptr inbounds [268435428 x i64], ptr %4, i64 0, i64 %7 194 store volatile i64 %1, ptr %8, align 8, !tbaa !6 195 %9 = add nuw nsw i64 %7, 1 196 %10 = icmp eq i64 %9, 268435428 197 br i1 %10, label %5, label %6, !llvm.loop !8 198} 199 200; Function Attrs: argmemonly nofree nounwind 201define fastcc void @store__vm256_stk_big(<256 x i1> noundef %0, i64 noundef %1) { 202; CHECK-LABEL: store__vm256_stk_big: 203; CHECK: # %bb.0: 204; CHECK-NEXT: st %s9, (, %s11) 205; CHECK-NEXT: st %s10, 8(, %s11) 206; CHECK-NEXT: or %s9, 0, %s11 207; CHECK-NEXT: lea %s13, 2147483616 208; CHECK-NEXT: and %s13, %s13, (32)0 209; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) 210; CHECK-NEXT: and %s11, %s11, (59)1 211; CHECK-NEXT: brge.l %s11, %s8, .LBB2_4 212; CHECK-NEXT: # %bb.3: 213; CHECK-NEXT: ld %s61, 24(, %s14) 214; CHECK-NEXT: or %s62, 0, %s0 215; CHECK-NEXT: lea %s63, 315 216; CHECK-NEXT: shm.l %s63, (%s61) 217; CHECK-NEXT: shm.l %s8, 8(%s61) 218; CHECK-NEXT: shm.l %s11, 16(%s61) 219; CHECK-NEXT: monc 220; CHECK-NEXT: or %s0, 0, %s62 221; CHECK-NEXT: .LBB2_4: 222; CHECK-NEXT: lea %s13, -2147483648 223; CHECK-NEXT: and %s13, %s13, (32)0 224; CHECK-NEXT: lea.sl %s13, (%s11, %s13) 225; CHECK-NEXT: svm %s16, %vm1, 0 226; CHECK-NEXT: st %s16, (, %s13) 227; CHECK-NEXT: svm %s16, %vm1, 1 228; CHECK-NEXT: st %s16, 8(, %s13) 229; CHECK-NEXT: svm %s16, %vm1, 2 230; CHECK-NEXT: st %s16, 16(, %s13) 231; CHECK-NEXT: svm %s16, %vm1, 3 232; CHECK-NEXT: st %s16, 24(, %s13) 233; CHECK-NEXT: or %s1, 0, (0)1 234; CHECK-NEXT: lea %s2, 2147483432 235; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 236; CHECK-NEXT: st %s0, 216(%s1, %s11) 237; CHECK-NEXT: lea %s1, 8(, %s1) 238; CHECK-NEXT: brne.l %s1, %s2, .LBB2_1 239; CHECK-NEXT: # %bb.2: 240; CHECK-NEXT: or %s11, 0, %s9 241; CHECK-NEXT: ld %s10, 8(, %s11) 242; CHECK-NEXT: ld %s9, (, %s11) 243; CHECK-NEXT: b.l.t (, %s10) 244 %3 = alloca <256 x i1>, align 32 245 %4 = alloca [268435429 x i64], align 8 246 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3) 247 call void @llvm.lifetime.start.p0(i64 2147483432, ptr nonnull %4) 248 store volatile <256 x i1> %0, ptr %3, align 32, !tbaa !3 249 br label %6 250 2515: ; preds = %6 252 call void @llvm.lifetime.end.p0(i64 2147483432, ptr nonnull %4) 253 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3) 254 ret void 255 2566: ; preds = %2, %6 257 %7 = phi i64 [ 0, %2 ], [ %9, %6 ] 258 %8 = getelementptr inbounds [268435429 x i64], ptr %4, i64 0, i64 %7 259 store volatile i64 %1, ptr %8, align 8, !tbaa !6 260 %9 = add nuw nsw i64 %7, 1 261 %10 = icmp eq i64 %9, 268435429 262 br i1 %10, label %5, label %6, !llvm.loop !8 263} 264 265; Function Attrs: argmemonly nofree nounwind 266define fastcc void @store__vm256_stk_big2(<256 x i1> noundef %0, i64 noundef %1) { 267; CHECK-LABEL: store__vm256_stk_big2: 268; CHECK: # %bb.0: 269; CHECK-NEXT: st %s9, (, %s11) 270; CHECK-NEXT: st %s10, 8(, %s11) 271; CHECK-NEXT: or %s9, 0, %s11 272; CHECK-NEXT: lea %s13, 2147483424 273; CHECK-NEXT: and %s13, %s13, (32)0 274; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) 275; CHECK-NEXT: and %s11, %s11, (59)1 276; CHECK-NEXT: brge.l %s11, %s8, .LBB3_4 277; CHECK-NEXT: # %bb.3: 278; CHECK-NEXT: ld %s61, 24(, %s14) 279; CHECK-NEXT: or %s62, 0, %s0 280; CHECK-NEXT: lea %s63, 315 281; CHECK-NEXT: shm.l %s63, (%s61) 282; CHECK-NEXT: shm.l %s8, 8(%s61) 283; CHECK-NEXT: shm.l %s11, 16(%s61) 284; CHECK-NEXT: monc 285; CHECK-NEXT: or %s0, 0, %s62 286; CHECK-NEXT: .LBB3_4: 287; CHECK-NEXT: lea %s13, -2147483456 288; CHECK-NEXT: and %s13, %s13, (32)0 289; CHECK-NEXT: lea.sl %s13, (%s11, %s13) 290; CHECK-NEXT: svm %s16, %vm1, 0 291; CHECK-NEXT: st %s16, (, %s13) 292; CHECK-NEXT: svm %s16, %vm1, 1 293; CHECK-NEXT: st %s16, 8(, %s13) 294; CHECK-NEXT: svm %s16, %vm1, 2 295; CHECK-NEXT: st %s16, 16(, %s13) 296; CHECK-NEXT: svm %s16, %vm1, 3 297; CHECK-NEXT: st %s16, 24(, %s13) 298; CHECK-NEXT: or %s1, 0, (0)1 299; CHECK-NEXT: lea %s2, -2147483648 300; CHECK-NEXT: and %s2, %s2, (32)0 301; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 302; CHECK-NEXT: st %s0, 192(%s1, %s11) 303; CHECK-NEXT: lea %s1, 8(, %s1) 304; CHECK-NEXT: brne.l %s1, %s2, .LBB3_1 305; CHECK-NEXT: # %bb.2: 306; CHECK-NEXT: or %s11, 0, %s9 307; CHECK-NEXT: ld %s10, 8(, %s11) 308; CHECK-NEXT: ld %s9, (, %s11) 309; CHECK-NEXT: b.l.t (, %s10) 310 %3 = alloca <256 x i1>, align 32 311 %4 = alloca [268435456 x i64], align 8 312 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3) 313 call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %4) 314 store volatile <256 x i1> %0, ptr %3, align 32, !tbaa !3 315 br label %6 316 3175: ; preds = %6 318 call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %4) 319 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3) 320 ret void 321 3226: ; preds = %2, %6 323 %7 = phi i64 [ 0, %2 ], [ %9, %6 ] 324 %8 = getelementptr inbounds [268435456 x i64], ptr %4, i64 0, i64 %7 325 store volatile i64 %1, ptr %8, align 8, !tbaa !6 326 %9 = add nuw nsw i64 %7, 1 327 %10 = icmp eq i64 %9, 268435456 328 br i1 %10, label %5, label %6, !llvm.loop !10 329} 330 331; Function Attrs: argmemonly nofree nounwind 332define fastcc void @store__vm256_stk_dyn(<256 x i1> noundef %0, i64 noundef %1) { 333; CHECK-LABEL: store__vm256_stk_dyn: 334; CHECK: # %bb.0: 335; CHECK-NEXT: st %s9, (, %s11) 336; CHECK-NEXT: st %s10, 8(, %s11) 337; CHECK-NEXT: or %s9, 0, %s11 338; CHECK-NEXT: lea %s11, -272(, %s11) 339; CHECK-NEXT: brge.l.t %s11, %s8, .LBB4_2 340; CHECK-NEXT: # %bb.1: 341; CHECK-NEXT: ld %s61, 24(, %s14) 342; CHECK-NEXT: or %s62, 0, %s0 343; CHECK-NEXT: lea %s63, 315 344; CHECK-NEXT: shm.l %s63, (%s61) 345; CHECK-NEXT: shm.l %s8, 8(%s61) 346; CHECK-NEXT: shm.l %s11, 16(%s61) 347; CHECK-NEXT: monc 348; CHECK-NEXT: or %s0, 0, %s62 349; CHECK-NEXT: .LBB4_2: 350; CHECK-NEXT: sll %s0, %s0, 5 351; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 352; CHECK-NEXT: and %s1, %s1, (32)0 353; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 354; CHECK-NEXT: bsic %s10, (, %s12) 355; CHECK-NEXT: lea %s0, 240(, %s11) 356; CHECK-NEXT: svm %s1, %vm1, 3 357; CHECK-NEXT: st %s1, 24(, %s0) 358; CHECK-NEXT: svm %s1, %vm1, 2 359; CHECK-NEXT: st %s1, 16(, %s0) 360; CHECK-NEXT: svm %s1, %vm1, 1 361; CHECK-NEXT: st %s1, 8(, %s0) 362; CHECK-NEXT: svm %s1, %vm1, 0 363; CHECK-NEXT: st %s1, (, %s0) 364; CHECK-NEXT: svm %s16, %vm1, 0 365; CHECK-NEXT: st %s16, -32(, %s9) 366; CHECK-NEXT: svm %s16, %vm1, 1 367; CHECK-NEXT: st %s16, -24(, %s9) 368; CHECK-NEXT: svm %s16, %vm1, 2 369; CHECK-NEXT: st %s16, -16(, %s9) 370; CHECK-NEXT: svm %s16, %vm1, 3 371; CHECK-NEXT: st %s16, -8(, %s9) 372; CHECK-NEXT: or %s11, 0, %s9 373; CHECK-NEXT: ld %s10, 8(, %s11) 374; CHECK-NEXT: ld %s9, (, %s11) 375; CHECK-NEXT: b.l.t (, %s10) 376 %3 = alloca <256 x i1>, align 8 377 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3) 378 %4 = alloca <256 x i1>, i64 %1, align 8 379 store volatile <256 x i1> %0, ptr %4, align 32, !tbaa !3 380 store volatile <256 x i1> %0, ptr %3, align 32, !tbaa !3 381 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3) 382 ret void 383} 384 385; Function Attrs: argmemonly nofree nounwind 386define fastcc void @store__vm256_stk_dyn_align(<256 x i1> noundef %0, i64 noundef %1) { 387; CHECK-LABEL: store__vm256_stk_dyn_align: 388; CHECK: # %bb.0: 389; CHECK-NEXT: st %s9, (, %s11) 390; CHECK-NEXT: st %s10, 8(, %s11) 391; CHECK-NEXT: st %s17, 40(, %s11) 392; CHECK-NEXT: or %s9, 0, %s11 393; CHECK-NEXT: lea %s11, -288(, %s11) 394; CHECK-NEXT: and %s11, %s11, (59)1 395; CHECK-NEXT: or %s17, 0, %s11 396; CHECK-NEXT: brge.l.t %s11, %s8, .LBB5_2 397; CHECK-NEXT: # %bb.1: 398; CHECK-NEXT: ld %s61, 24(, %s14) 399; CHECK-NEXT: or %s62, 0, %s0 400; CHECK-NEXT: lea %s63, 315 401; CHECK-NEXT: shm.l %s63, (%s61) 402; CHECK-NEXT: shm.l %s8, 8(%s61) 403; CHECK-NEXT: shm.l %s11, 16(%s61) 404; CHECK-NEXT: monc 405; CHECK-NEXT: or %s0, 0, %s62 406; CHECK-NEXT: .LBB5_2: 407; CHECK-NEXT: sll %s0, %s0, 5 408; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 409; CHECK-NEXT: and %s1, %s1, (32)0 410; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 411; CHECK-NEXT: bsic %s10, (, %s12) 412; CHECK-NEXT: lea %s0, 240(, %s11) 413; CHECK-NEXT: svm %s1, %vm1, 3 414; CHECK-NEXT: st %s1, 24(, %s0) 415; CHECK-NEXT: svm %s1, %vm1, 2 416; CHECK-NEXT: st %s1, 16(, %s0) 417; CHECK-NEXT: svm %s1, %vm1, 1 418; CHECK-NEXT: st %s1, 8(, %s0) 419; CHECK-NEXT: svm %s1, %vm1, 0 420; CHECK-NEXT: st %s1, (, %s0) 421; CHECK-NEXT: svm %s16, %vm1, 0 422; CHECK-NEXT: st %s16, 256(, %s17) 423; CHECK-NEXT: svm %s16, %vm1, 1 424; CHECK-NEXT: st %s16, 264(, %s17) 425; CHECK-NEXT: svm %s16, %vm1, 2 426; CHECK-NEXT: st %s16, 272(, %s17) 427; CHECK-NEXT: svm %s16, %vm1, 3 428; CHECK-NEXT: st %s16, 280(, %s17) 429; CHECK-NEXT: or %s11, 0, %s9 430; CHECK-NEXT: ld %s17, 40(, %s11) 431; CHECK-NEXT: ld %s10, 8(, %s11) 432; CHECK-NEXT: ld %s9, (, %s11) 433; CHECK-NEXT: b.l.t (, %s10) 434 %3 = alloca <256 x i1>, align 32 435 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3) 436 %4 = alloca <256 x i1>, i64 %1, align 8 437 store volatile <256 x i1> %0, ptr %4, align 32, !tbaa !3 438 store volatile <256 x i1> %0, ptr %3, align 32, !tbaa !3 439 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3) 440 ret void 441} 442 443; Function Attrs: argmemonly nofree nounwind 444define fastcc void @store__vm256_stk_dyn_align2(<256 x i1> noundef %0, i64 noundef %1) { 445; CHECK-LABEL: store__vm256_stk_dyn_align2: 446; CHECK: # %bb.0: 447; CHECK-NEXT: st %s9, (, %s11) 448; CHECK-NEXT: st %s10, 8(, %s11) 449; CHECK-NEXT: st %s17, 40(, %s11) 450; CHECK-NEXT: or %s9, 0, %s11 451; CHECK-NEXT: lea %s11, -320(, %s11) 452; CHECK-NEXT: and %s11, %s11, (58)1 453; CHECK-NEXT: or %s17, 0, %s11 454; CHECK-NEXT: brge.l.t %s11, %s8, .LBB6_2 455; CHECK-NEXT: # %bb.1: 456; CHECK-NEXT: ld %s61, 24(, %s14) 457; CHECK-NEXT: or %s62, 0, %s0 458; CHECK-NEXT: lea %s63, 315 459; CHECK-NEXT: shm.l %s63, (%s61) 460; CHECK-NEXT: shm.l %s8, 8(%s61) 461; CHECK-NEXT: shm.l %s11, 16(%s61) 462; CHECK-NEXT: monc 463; CHECK-NEXT: or %s0, 0, %s62 464; CHECK-NEXT: .LBB6_2: 465; CHECK-NEXT: sll %s0, %s0, 5 466; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 467; CHECK-NEXT: and %s1, %s1, (32)0 468; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 469; CHECK-NEXT: bsic %s10, (, %s12) 470; CHECK-NEXT: lea %s0, 240(, %s11) 471; CHECK-NEXT: svm %s1, %vm1, 3 472; CHECK-NEXT: st %s1, 24(, %s0) 473; CHECK-NEXT: svm %s1, %vm1, 2 474; CHECK-NEXT: st %s1, 16(, %s0) 475; CHECK-NEXT: svm %s1, %vm1, 1 476; CHECK-NEXT: st %s1, 8(, %s0) 477; CHECK-NEXT: svm %s1, %vm1, 0 478; CHECK-NEXT: st %s1, (, %s0) 479; CHECK-NEXT: svm %s16, %vm1, 0 480; CHECK-NEXT: st %s16, 288(, %s17) 481; CHECK-NEXT: svm %s16, %vm1, 1 482; CHECK-NEXT: st %s16, 296(, %s17) 483; CHECK-NEXT: svm %s16, %vm1, 2 484; CHECK-NEXT: st %s16, 304(, %s17) 485; CHECK-NEXT: svm %s16, %vm1, 3 486; CHECK-NEXT: st %s16, 312(, %s17) 487; CHECK-NEXT: svm %s16, %vm1, 0 488; CHECK-NEXT: st %s16, 256(, %s17) 489; CHECK-NEXT: svm %s16, %vm1, 1 490; CHECK-NEXT: st %s16, 264(, %s17) 491; CHECK-NEXT: svm %s16, %vm1, 2 492; CHECK-NEXT: st %s16, 272(, %s17) 493; CHECK-NEXT: svm %s16, %vm1, 3 494; CHECK-NEXT: st %s16, 280(, %s17) 495; CHECK-NEXT: or %s11, 0, %s9 496; CHECK-NEXT: ld %s17, 40(, %s11) 497; CHECK-NEXT: ld %s10, 8(, %s11) 498; CHECK-NEXT: ld %s9, (, %s11) 499; CHECK-NEXT: b.l.t (, %s10) 500 %3 = alloca <256 x i1>, align 32 501 %4 = alloca <256 x i1>, align 64 502 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3) 503 %5 = alloca <256 x i1>, i64 %1, align 8 504 store volatile <256 x i1> %0, ptr %5, align 32, !tbaa !3 505 store volatile <256 x i1> %0, ptr %3, align 32, !tbaa !3 506 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %4) 507 store volatile <256 x i1> %0, ptr %4, align 64, !tbaa !3 508 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %4) 509 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3) 510 ret void 511} 512 513; Function Attrs: nounwind 514define fastcc void @store__vm256_stk_dyn_align_spill(<256 x i1> noundef %0, i64 noundef %1) { 515; CHECK-LABEL: store__vm256_stk_dyn_align_spill: 516; CHECK: # %bb.0: 517; CHECK-NEXT: st %s9, (, %s11) 518; CHECK-NEXT: st %s10, 8(, %s11) 519; CHECK-NEXT: st %s17, 40(, %s11) 520; CHECK-NEXT: or %s9, 0, %s11 521; CHECK-NEXT: lea %s11, -320(, %s11) 522; CHECK-NEXT: and %s11, %s11, (59)1 523; CHECK-NEXT: or %s17, 0, %s11 524; CHECK-NEXT: brge.l.t %s11, %s8, .LBB7_2 525; CHECK-NEXT: # %bb.1: 526; CHECK-NEXT: ld %s61, 24(, %s14) 527; CHECK-NEXT: or %s62, 0, %s0 528; CHECK-NEXT: lea %s63, 315 529; CHECK-NEXT: shm.l %s63, (%s61) 530; CHECK-NEXT: shm.l %s8, 8(%s61) 531; CHECK-NEXT: shm.l %s11, 16(%s61) 532; CHECK-NEXT: monc 533; CHECK-NEXT: or %s0, 0, %s62 534; CHECK-NEXT: .LBB7_2: 535; CHECK-NEXT: st %s18, 48(, %s9) # 8-byte Folded Spill 536; CHECK-NEXT: st %s19, 56(, %s9) # 8-byte Folded Spill 537; CHECK-NEXT: or %s18, 0, %s0 538; CHECK-NEXT: svm %s16, %vm1, 0 539; CHECK-NEXT: st %s16, 256(, %s17) 540; CHECK-NEXT: svm %s16, %vm1, 1 541; CHECK-NEXT: st %s16, 264(, %s17) 542; CHECK-NEXT: svm %s16, %vm1, 2 543; CHECK-NEXT: st %s16, 272(, %s17) 544; CHECK-NEXT: svm %s16, %vm1, 3 545; CHECK-NEXT: st %s16, 280(, %s17) # 32-byte Folded Spill 546; CHECK-NEXT: sll %s0, %s0, 5 547; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 548; CHECK-NEXT: and %s1, %s1, (32)0 549; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 550; CHECK-NEXT: bsic %s10, (, %s12) 551; CHECK-NEXT: lea %s19, 240(, %s11) 552; CHECK-NEXT: lea %s0, dummy@lo 553; CHECK-NEXT: and %s0, %s0, (32)0 554; CHECK-NEXT: lea.sl %s12, dummy@hi(, %s0) 555; CHECK-NEXT: bsic %s10, (, %s12) 556; CHECK-NEXT: lea %s0, pass@lo 557; CHECK-NEXT: and %s0, %s0, (32)0 558; CHECK-NEXT: lea.sl %s12, pass@hi(, %s0) 559; CHECK-NEXT: or %s0, 0, %s18 560; CHECK-NEXT: bsic %s10, (, %s12) 561; CHECK-NEXT: ld %s16, 256(, %s17) 562; CHECK-NEXT: lvm %vm1, 0, %s16 563; CHECK-NEXT: ld %s16, 264(, %s17) 564; CHECK-NEXT: lvm %vm1, 1, %s16 565; CHECK-NEXT: ld %s16, 272(, %s17) 566; CHECK-NEXT: lvm %vm1, 2, %s16 567; CHECK-NEXT: ld %s16, 280(, %s17) # 32-byte Folded Reload 568; CHECK-NEXT: lvm %vm1, 3, %s16 569; CHECK-NEXT: svm %s0, %vm1, 3 570; CHECK-NEXT: st %s0, 24(, %s19) 571; CHECK-NEXT: svm %s0, %vm1, 2 572; CHECK-NEXT: st %s0, 16(, %s19) 573; CHECK-NEXT: svm %s0, %vm1, 1 574; CHECK-NEXT: st %s0, 8(, %s19) 575; CHECK-NEXT: svm %s0, %vm1, 0 576; CHECK-NEXT: st %s0, (, %s19) 577; CHECK-NEXT: svm %s16, %vm1, 0 578; CHECK-NEXT: st %s16, 288(, %s17) 579; CHECK-NEXT: svm %s16, %vm1, 1 580; CHECK-NEXT: st %s16, 296(, %s17) 581; CHECK-NEXT: svm %s16, %vm1, 2 582; CHECK-NEXT: st %s16, 304(, %s17) 583; CHECK-NEXT: svm %s16, %vm1, 3 584; CHECK-NEXT: st %s16, 312(, %s17) 585; CHECK-NEXT: ld %s19, 56(, %s9) # 8-byte Folded Reload 586; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload 587; CHECK-NEXT: or %s11, 0, %s9 588; CHECK-NEXT: ld %s17, 40(, %s11) 589; CHECK-NEXT: ld %s10, 8(, %s11) 590; CHECK-NEXT: ld %s9, (, %s11) 591; CHECK-NEXT: b.l.t (, %s10) 592 %3 = alloca <256 x i1>, align 32 593 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3) 594 %4 = alloca <256 x i1>, i64 %1, align 8 595 tail call fastcc void @dummy() 596 tail call fastcc void @pass(i64 noundef %1) 597 store volatile <256 x i1> %0, ptr %4, align 32, !tbaa !3 598 store volatile <256 x i1> %0, ptr %3, align 32, !tbaa !3 599 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3) 600 ret void 601} 602 603declare fastcc void @dummy() 604 605declare fastcc void @pass(i64 noundef) 606 607; Function Attrs: argmemonly nofree nounwind 608define fastcc void @store__vm512_stk(<512 x i1> noundef %0) { 609; CHECK-LABEL: store__vm512_stk: 610; CHECK: # %bb.0: 611; CHECK-NEXT: st %s9, (, %s11) 612; CHECK-NEXT: st %s10, 8(, %s11) 613; CHECK-NEXT: or %s9, 0, %s11 614; CHECK-NEXT: lea %s11, -256(, %s11) 615; CHECK-NEXT: and %s11, %s11, (58)1 616; CHECK-NEXT: brge.l.t %s11, %s8, .LBB8_2 617; CHECK-NEXT: # %bb.1: 618; CHECK-NEXT: ld %s61, 24(, %s14) 619; CHECK-NEXT: or %s62, 0, %s0 620; CHECK-NEXT: lea %s63, 315 621; CHECK-NEXT: shm.l %s63, (%s61) 622; CHECK-NEXT: shm.l %s8, 8(%s61) 623; CHECK-NEXT: shm.l %s11, 16(%s61) 624; CHECK-NEXT: monc 625; CHECK-NEXT: or %s0, 0, %s62 626; CHECK-NEXT: .LBB8_2: 627; CHECK-NEXT: svm %s16, %vm3, 0 628; CHECK-NEXT: st %s16, 192(, %s11) 629; CHECK-NEXT: svm %s16, %vm3, 1 630; CHECK-NEXT: st %s16, 200(, %s11) 631; CHECK-NEXT: svm %s16, %vm3, 2 632; CHECK-NEXT: st %s16, 208(, %s11) 633; CHECK-NEXT: svm %s16, %vm3, 3 634; CHECK-NEXT: st %s16, 216(, %s11) 635; CHECK-NEXT: svm %s16, %vm2, 0 636; CHECK-NEXT: st %s16, 224(, %s11) 637; CHECK-NEXT: svm %s16, %vm2, 1 638; CHECK-NEXT: st %s16, 232(, %s11) 639; CHECK-NEXT: svm %s16, %vm2, 2 640; CHECK-NEXT: st %s16, 240(, %s11) 641; CHECK-NEXT: svm %s16, %vm2, 3 642; CHECK-NEXT: st %s16, 248(, %s11) 643; CHECK-NEXT: or %s11, 0, %s9 644; CHECK-NEXT: ld %s10, 8(, %s11) 645; CHECK-NEXT: ld %s9, (, %s11) 646; CHECK-NEXT: b.l.t (, %s10) 647 %2 = alloca <512 x i1>, align 64 648 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2) 649 store volatile <512 x i1> %0, ptr %2, align 64, !tbaa !3 650 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2) 651 ret void 652} 653 654; Function Attrs: argmemonly nofree nounwind 655define fastcc void @store__vm512_stk_bc(<512 x i1> noundef %0) { 656; CHECK-LABEL: store__vm512_stk_bc: 657; CHECK: # %bb.0: 658; CHECK-NEXT: st %s9, (, %s11) 659; CHECK-NEXT: st %s10, 8(, %s11) 660; CHECK-NEXT: or %s9, 0, %s11 661; CHECK-NEXT: lea %s11, -320(, %s11) 662; CHECK-NEXT: and %s11, %s11, (58)1 663; CHECK-NEXT: brge.l.t %s11, %s8, .LBB9_2 664; CHECK-NEXT: # %bb.1: 665; CHECK-NEXT: ld %s61, 24(, %s14) 666; CHECK-NEXT: or %s62, 0, %s0 667; CHECK-NEXT: lea %s63, 315 668; CHECK-NEXT: shm.l %s63, (%s61) 669; CHECK-NEXT: shm.l %s8, 8(%s61) 670; CHECK-NEXT: shm.l %s11, 16(%s61) 671; CHECK-NEXT: monc 672; CHECK-NEXT: or %s0, 0, %s62 673; CHECK-NEXT: .LBB9_2: 674; CHECK-NEXT: svm %s16, %vm3, 0 675; CHECK-NEXT: st %s16, 192(, %s11) 676; CHECK-NEXT: svm %s16, %vm3, 1 677; CHECK-NEXT: st %s16, 200(, %s11) 678; CHECK-NEXT: svm %s16, %vm3, 2 679; CHECK-NEXT: st %s16, 208(, %s11) 680; CHECK-NEXT: svm %s16, %vm3, 3 681; CHECK-NEXT: st %s16, 216(, %s11) 682; CHECK-NEXT: svm %s16, %vm2, 0 683; CHECK-NEXT: st %s16, 224(, %s11) 684; CHECK-NEXT: svm %s16, %vm2, 1 685; CHECK-NEXT: st %s16, 232(, %s11) 686; CHECK-NEXT: svm %s16, %vm2, 2 687; CHECK-NEXT: st %s16, 240(, %s11) 688; CHECK-NEXT: svm %s16, %vm2, 3 689; CHECK-NEXT: st %s16, 248(, %s11) 690; CHECK-NEXT: ld %s0, 192(, %s11) 691; CHECK-NEXT: ld %s1, 200(, %s11) 692; CHECK-NEXT: ld %s2, 208(, %s11) 693; CHECK-NEXT: ld %s3, 216(, %s11) 694; CHECK-NEXT: ld %s4, 248(, %s11) 695; CHECK-NEXT: ld %s5, 240(, %s11) 696; CHECK-NEXT: ld %s6, 232(, %s11) 697; CHECK-NEXT: ld %s7, 224(, %s11) 698; CHECK-NEXT: st %s4, 312(, %s11) 699; CHECK-NEXT: st %s5, 304(, %s11) 700; CHECK-NEXT: st %s6, 296(, %s11) 701; CHECK-NEXT: st %s7, 288(, %s11) 702; CHECK-NEXT: st %s3, 280(, %s11) 703; CHECK-NEXT: st %s2, 272(, %s11) 704; CHECK-NEXT: st %s1, 264(, %s11) 705; CHECK-NEXT: st %s0, 256(, %s11) 706; CHECK-NEXT: or %s11, 0, %s9 707; CHECK-NEXT: ld %s10, 8(, %s11) 708; CHECK-NEXT: ld %s9, (, %s11) 709; CHECK-NEXT: b.l.t (, %s10) 710 %2 = alloca i512, align 64 711 %3 = bitcast <512 x i1> %0 to i512 712 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2) 713 store volatile i512 %3, ptr %2, align 64, !tbaa !3 714 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2) 715 ret void 716} 717 718; Function Attrs: argmemonly nofree nounwind 719define fastcc void @store__vm512_stk_big(<512 x i1> noundef %0, i64 noundef %1) { 720; CHECK-LABEL: store__vm512_stk_big: 721; CHECK: # %bb.0: 722; CHECK-NEXT: st %s9, (, %s11) 723; CHECK-NEXT: st %s10, 8(, %s11) 724; CHECK-NEXT: or %s9, 0, %s11 725; CHECK-NEXT: lea %s13, 2147483392 726; CHECK-NEXT: and %s13, %s13, (32)0 727; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) 728; CHECK-NEXT: and %s11, %s11, (58)1 729; CHECK-NEXT: brge.l %s11, %s8, .LBB10_4 730; CHECK-NEXT: # %bb.3: 731; CHECK-NEXT: ld %s61, 24(, %s14) 732; CHECK-NEXT: or %s62, 0, %s0 733; CHECK-NEXT: lea %s63, 315 734; CHECK-NEXT: shm.l %s63, (%s61) 735; CHECK-NEXT: shm.l %s8, 8(%s61) 736; CHECK-NEXT: shm.l %s11, 16(%s61) 737; CHECK-NEXT: monc 738; CHECK-NEXT: or %s0, 0, %s62 739; CHECK-NEXT: .LBB10_4: 740; CHECK-NEXT: lea %s13, -2147483456 741; CHECK-NEXT: and %s13, %s13, (32)0 742; CHECK-NEXT: lea.sl %s13, (%s11, %s13) 743; CHECK-NEXT: svm %s16, %vm3, 0 744; CHECK-NEXT: st %s16, (, %s13) 745; CHECK-NEXT: svm %s16, %vm3, 1 746; CHECK-NEXT: st %s16, 8(, %s13) 747; CHECK-NEXT: svm %s16, %vm3, 2 748; CHECK-NEXT: st %s16, 16(, %s13) 749; CHECK-NEXT: svm %s16, %vm3, 3 750; CHECK-NEXT: st %s16, 24(, %s13) 751; CHECK-NEXT: svm %s16, %vm2, 0 752; CHECK-NEXT: st %s16, 32(, %s13) 753; CHECK-NEXT: svm %s16, %vm2, 1 754; CHECK-NEXT: st %s16, 40(, %s13) 755; CHECK-NEXT: svm %s16, %vm2, 2 756; CHECK-NEXT: st %s16, 48(, %s13) 757; CHECK-NEXT: svm %s16, %vm2, 3 758; CHECK-NEXT: st %s16, 56(, %s13) 759; CHECK-NEXT: or %s1, 0, (0)1 760; CHECK-NEXT: lea %s2, 2147483640 761; CHECK-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 762; CHECK-NEXT: st %s0, 200(%s1, %s11) 763; CHECK-NEXT: lea %s1, 8(, %s1) 764; CHECK-NEXT: brne.l %s1, %s2, .LBB10_1 765; CHECK-NEXT: # %bb.2: 766; CHECK-NEXT: or %s11, 0, %s9 767; CHECK-NEXT: ld %s10, 8(, %s11) 768; CHECK-NEXT: ld %s9, (, %s11) 769; CHECK-NEXT: b.l.t (, %s10) 770 %3 = alloca <512 x i1>, align 64 771 %4 = alloca [268435455 x i64], align 8 772 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %3) 773 call void @llvm.lifetime.start.p0(i64 2147483640, ptr nonnull %4) 774 store volatile <512 x i1> %0, ptr %3, align 64, !tbaa !3 775 br label %6 776 7775: ; preds = %6 778 call void @llvm.lifetime.end.p0(i64 2147483640, ptr nonnull %4) 779 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %3) 780 ret void 781 7826: ; preds = %2, %6 783 %7 = phi i64 [ 0, %2 ], [ %9, %6 ] 784 %8 = getelementptr inbounds [268435455 x i64], ptr %4, i64 0, i64 %7 785 store volatile i64 %1, ptr %8, align 8, !tbaa !6 786 %9 = add nuw nsw i64 %7, 1 787 %10 = icmp eq i64 %9, 268435455 788 br i1 %10, label %5, label %6, !llvm.loop !11 789} 790 791; Function Attrs: argmemonly nofree nounwind 792define fastcc void @store__vm512_stk_big2(<512 x i1> noundef %0, i64 noundef %1) { 793; CHECK-LABEL: store__vm512_stk_big2: 794; CHECK: # %bb.0: 795; CHECK-NEXT: st %s9, (, %s11) 796; CHECK-NEXT: st %s10, 8(, %s11) 797; CHECK-NEXT: or %s9, 0, %s11 798; CHECK-NEXT: lea %s13, 2147483392 799; CHECK-NEXT: and %s13, %s13, (32)0 800; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) 801; CHECK-NEXT: and %s11, %s11, (58)1 802; CHECK-NEXT: brge.l %s11, %s8, .LBB11_4 803; CHECK-NEXT: # %bb.3: 804; CHECK-NEXT: ld %s61, 24(, %s14) 805; CHECK-NEXT: or %s62, 0, %s0 806; CHECK-NEXT: lea %s63, 315 807; CHECK-NEXT: shm.l %s63, (%s61) 808; CHECK-NEXT: shm.l %s8, 8(%s61) 809; CHECK-NEXT: shm.l %s11, 16(%s61) 810; CHECK-NEXT: monc 811; CHECK-NEXT: or %s0, 0, %s62 812; CHECK-NEXT: .LBB11_4: 813; CHECK-NEXT: lea %s13, -2147483456 814; CHECK-NEXT: and %s13, %s13, (32)0 815; CHECK-NEXT: lea.sl %s13, (%s11, %s13) 816; CHECK-NEXT: svm %s16, %vm3, 0 817; CHECK-NEXT: st %s16, (, %s13) 818; CHECK-NEXT: svm %s16, %vm3, 1 819; CHECK-NEXT: st %s16, 8(, %s13) 820; CHECK-NEXT: svm %s16, %vm3, 2 821; CHECK-NEXT: st %s16, 16(, %s13) 822; CHECK-NEXT: svm %s16, %vm3, 3 823; CHECK-NEXT: st %s16, 24(, %s13) 824; CHECK-NEXT: svm %s16, %vm2, 0 825; CHECK-NEXT: st %s16, 32(, %s13) 826; CHECK-NEXT: svm %s16, %vm2, 1 827; CHECK-NEXT: st %s16, 40(, %s13) 828; CHECK-NEXT: svm %s16, %vm2, 2 829; CHECK-NEXT: st %s16, 48(, %s13) 830; CHECK-NEXT: svm %s16, %vm2, 3 831; CHECK-NEXT: st %s16, 56(, %s13) 832; CHECK-NEXT: or %s1, 0, (0)1 833; CHECK-NEXT: lea %s2, -2147483648 834; CHECK-NEXT: and %s2, %s2, (32)0 835; CHECK-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 836; CHECK-NEXT: st %s0, 192(%s1, %s11) 837; CHECK-NEXT: lea %s1, 8(, %s1) 838; CHECK-NEXT: brne.l %s1, %s2, .LBB11_1 839; CHECK-NEXT: # %bb.2: 840; CHECK-NEXT: or %s11, 0, %s9 841; CHECK-NEXT: ld %s10, 8(, %s11) 842; CHECK-NEXT: ld %s9, (, %s11) 843; CHECK-NEXT: b.l.t (, %s10) 844 %3 = alloca <512 x i1>, align 64 845 %4 = alloca [268435456 x i64], align 8 846 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %3) 847 call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %4) 848 store volatile <512 x i1> %0, ptr %3, align 64, !tbaa !3 849 br label %6 850 8515: ; preds = %6 852 call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %4) 853 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %3) 854 ret void 855 8566: ; preds = %2, %6 857 %7 = phi i64 [ 0, %2 ], [ %9, %6 ] 858 %8 = getelementptr inbounds [268435456 x i64], ptr %4, i64 0, i64 %7 859 store volatile i64 %1, ptr %8, align 8, !tbaa !6 860 %9 = add nuw nsw i64 %7, 1 861 %10 = icmp eq i64 %9, 268435456 862 br i1 %10, label %5, label %6, !llvm.loop !12 863} 864 865; Function Attrs: argmemonly nofree nounwind 866define fastcc void @store__vm512_stk_dyn(<512 x i1> noundef %0, i64 noundef %1) { 867; CHECK-LABEL: store__vm512_stk_dyn: 868; CHECK: # %bb.0: 869; CHECK-NEXT: st %s9, (, %s11) 870; CHECK-NEXT: st %s10, 8(, %s11) 871; CHECK-NEXT: st %s17, 40(, %s11) 872; CHECK-NEXT: or %s9, 0, %s11 873; CHECK-NEXT: lea %s11, -320(, %s11) 874; CHECK-NEXT: and %s11, %s11, (58)1 875; CHECK-NEXT: or %s17, 0, %s11 876; CHECK-NEXT: brge.l.t %s11, %s8, .LBB12_2 877; CHECK-NEXT: # %bb.1: 878; CHECK-NEXT: ld %s61, 24(, %s14) 879; CHECK-NEXT: or %s62, 0, %s0 880; CHECK-NEXT: lea %s63, 315 881; CHECK-NEXT: shm.l %s63, (%s61) 882; CHECK-NEXT: shm.l %s8, 8(%s61) 883; CHECK-NEXT: shm.l %s11, 16(%s61) 884; CHECK-NEXT: monc 885; CHECK-NEXT: or %s0, 0, %s62 886; CHECK-NEXT: .LBB12_2: 887; CHECK-NEXT: sll %s0, %s0, 6 888; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 889; CHECK-NEXT: and %s1, %s1, (32)0 890; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 891; CHECK-NEXT: bsic %s10, (, %s12) 892; CHECK-NEXT: lea %s0, 240(, %s11) 893; CHECK-NEXT: svm %s1, %vm2, 3 894; CHECK-NEXT: st %s1, 56(, %s0) 895; CHECK-NEXT: svm %s1, %vm2, 2 896; CHECK-NEXT: st %s1, 48(, %s0) 897; CHECK-NEXT: svm %s1, %vm2, 1 898; CHECK-NEXT: st %s1, 40(, %s0) 899; CHECK-NEXT: svm %s1, %vm2, 0 900; CHECK-NEXT: st %s1, 32(, %s0) 901; CHECK-NEXT: svm %s1, %vm3, 3 902; CHECK-NEXT: st %s1, 24(, %s0) 903; CHECK-NEXT: svm %s1, %vm3, 2 904; CHECK-NEXT: st %s1, 16(, %s0) 905; CHECK-NEXT: svm %s1, %vm3, 1 906; CHECK-NEXT: st %s1, 8(, %s0) 907; CHECK-NEXT: svm %s1, %vm3, 0 908; CHECK-NEXT: st %s1, (, %s0) 909; CHECK-NEXT: svm %s16, %vm3, 0 910; CHECK-NEXT: st %s16, 256(, %s17) 911; CHECK-NEXT: svm %s16, %vm3, 1 912; CHECK-NEXT: st %s16, 264(, %s17) 913; CHECK-NEXT: svm %s16, %vm3, 2 914; CHECK-NEXT: st %s16, 272(, %s17) 915; CHECK-NEXT: svm %s16, %vm3, 3 916; CHECK-NEXT: st %s16, 280(, %s17) 917; CHECK-NEXT: svm %s16, %vm2, 0 918; CHECK-NEXT: st %s16, 288(, %s17) 919; CHECK-NEXT: svm %s16, %vm2, 1 920; CHECK-NEXT: st %s16, 296(, %s17) 921; CHECK-NEXT: svm %s16, %vm2, 2 922; CHECK-NEXT: st %s16, 304(, %s17) 923; CHECK-NEXT: svm %s16, %vm2, 3 924; CHECK-NEXT: st %s16, 312(, %s17) 925; CHECK-NEXT: or %s11, 0, %s9 926; CHECK-NEXT: ld %s17, 40(, %s11) 927; CHECK-NEXT: ld %s10, 8(, %s11) 928; CHECK-NEXT: ld %s9, (, %s11) 929; CHECK-NEXT: b.l.t (, %s10) 930 %3 = alloca <512 x i1>, align 64 931 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %3) 932 %4 = alloca <512 x i1>, i64 %1, align 8 933 store volatile <512 x i1> %0, ptr %4, align 64, !tbaa !3 934 store volatile <512 x i1> %0, ptr %3, align 64, !tbaa !3 935 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %3) 936 ret void 937} 938 939; Function Attrs: argmemonly nofree nounwind 940define fastcc void @store__vm512_stk_dyn_align(<512 x i1> noundef %0, i64 noundef %1) { 941; CHECK-LABEL: store__vm512_stk_dyn_align: 942; CHECK: # %bb.0: 943; CHECK-NEXT: st %s9, (, %s11) 944; CHECK-NEXT: st %s10, 8(, %s11) 945; CHECK-NEXT: st %s17, 40(, %s11) 946; CHECK-NEXT: or %s9, 0, %s11 947; CHECK-NEXT: lea %s11, -320(, %s11) 948; CHECK-NEXT: and %s11, %s11, (59)1 949; CHECK-NEXT: or %s17, 0, %s11 950; CHECK-NEXT: brge.l.t %s11, %s8, .LBB13_2 951; CHECK-NEXT: # %bb.1: 952; CHECK-NEXT: ld %s61, 24(, %s14) 953; CHECK-NEXT: or %s62, 0, %s0 954; CHECK-NEXT: lea %s63, 315 955; CHECK-NEXT: shm.l %s63, (%s61) 956; CHECK-NEXT: shm.l %s8, 8(%s61) 957; CHECK-NEXT: shm.l %s11, 16(%s61) 958; CHECK-NEXT: monc 959; CHECK-NEXT: or %s0, 0, %s62 960; CHECK-NEXT: .LBB13_2: 961; CHECK-NEXT: sll %s0, %s0, 6 962; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 963; CHECK-NEXT: and %s1, %s1, (32)0 964; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 965; CHECK-NEXT: bsic %s10, (, %s12) 966; CHECK-NEXT: lea %s0, 240(, %s11) 967; CHECK-NEXT: svm %s1, %vm2, 3 968; CHECK-NEXT: st %s1, 56(, %s0) 969; CHECK-NEXT: svm %s1, %vm2, 2 970; CHECK-NEXT: st %s1, 48(, %s0) 971; CHECK-NEXT: svm %s1, %vm2, 1 972; CHECK-NEXT: st %s1, 40(, %s0) 973; CHECK-NEXT: svm %s1, %vm2, 0 974; CHECK-NEXT: st %s1, 32(, %s0) 975; CHECK-NEXT: svm %s1, %vm3, 3 976; CHECK-NEXT: st %s1, 24(, %s0) 977; CHECK-NEXT: svm %s1, %vm3, 2 978; CHECK-NEXT: st %s1, 16(, %s0) 979; CHECK-NEXT: svm %s1, %vm3, 1 980; CHECK-NEXT: st %s1, 8(, %s0) 981; CHECK-NEXT: svm %s1, %vm3, 0 982; CHECK-NEXT: st %s1, (, %s0) 983; CHECK-NEXT: svm %s16, %vm3, 0 984; CHECK-NEXT: st %s16, 256(, %s17) 985; CHECK-NEXT: svm %s16, %vm3, 1 986; CHECK-NEXT: st %s16, 264(, %s17) 987; CHECK-NEXT: svm %s16, %vm3, 2 988; CHECK-NEXT: st %s16, 272(, %s17) 989; CHECK-NEXT: svm %s16, %vm3, 3 990; CHECK-NEXT: st %s16, 280(, %s17) 991; CHECK-NEXT: svm %s16, %vm2, 0 992; CHECK-NEXT: st %s16, 288(, %s17) 993; CHECK-NEXT: svm %s16, %vm2, 1 994; CHECK-NEXT: st %s16, 296(, %s17) 995; CHECK-NEXT: svm %s16, %vm2, 2 996; CHECK-NEXT: st %s16, 304(, %s17) 997; CHECK-NEXT: svm %s16, %vm2, 3 998; CHECK-NEXT: st %s16, 312(, %s17) 999; CHECK-NEXT: or %s11, 0, %s9 1000; CHECK-NEXT: ld %s17, 40(, %s11) 1001; CHECK-NEXT: ld %s10, 8(, %s11) 1002; CHECK-NEXT: ld %s9, (, %s11) 1003; CHECK-NEXT: b.l.t (, %s10) 1004 %3 = alloca <512 x i1>, align 32 1005 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %3) 1006 %4 = alloca <512 x i1>, i64 %1, align 8 1007 store volatile <512 x i1> %0, ptr %4, align 64, !tbaa !3 1008 store volatile <512 x i1> %0, ptr %3, align 32, !tbaa !3 1009 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %3) 1010 ret void 1011} 1012 1013; Function Attrs: argmemonly nofree nounwind 1014define fastcc void @store__vm512_stk_dyn_align2(<512 x i1> noundef %0, i64 noundef %1) { 1015; CHECK-LABEL: store__vm512_stk_dyn_align2: 1016; CHECK: # %bb.0: 1017; CHECK-NEXT: st %s9, (, %s11) 1018; CHECK-NEXT: st %s10, 8(, %s11) 1019; CHECK-NEXT: st %s17, 40(, %s11) 1020; CHECK-NEXT: or %s9, 0, %s11 1021; CHECK-NEXT: lea %s11, -384(, %s11) 1022; CHECK-NEXT: and %s11, %s11, (58)1 1023; CHECK-NEXT: or %s17, 0, %s11 1024; CHECK-NEXT: brge.l.t %s11, %s8, .LBB14_2 1025; CHECK-NEXT: # %bb.1: 1026; CHECK-NEXT: ld %s61, 24(, %s14) 1027; CHECK-NEXT: or %s62, 0, %s0 1028; CHECK-NEXT: lea %s63, 315 1029; CHECK-NEXT: shm.l %s63, (%s61) 1030; CHECK-NEXT: shm.l %s8, 8(%s61) 1031; CHECK-NEXT: shm.l %s11, 16(%s61) 1032; CHECK-NEXT: monc 1033; CHECK-NEXT: or %s0, 0, %s62 1034; CHECK-NEXT: .LBB14_2: 1035; CHECK-NEXT: lea %s0, 15(, %s0) 1036; CHECK-NEXT: and %s0, -16, %s0 1037; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 1038; CHECK-NEXT: and %s1, %s1, (32)0 1039; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 1040; CHECK-NEXT: bsic %s10, (, %s12) 1041; CHECK-NEXT: lea %s0, 240(, %s11) 1042; CHECK-NEXT: svm %s1, %vm2, 3 1043; CHECK-NEXT: st %s1, 56(, %s0) 1044; CHECK-NEXT: svm %s1, %vm2, 2 1045; CHECK-NEXT: st %s1, 48(, %s0) 1046; CHECK-NEXT: svm %s1, %vm2, 1 1047; CHECK-NEXT: st %s1, 40(, %s0) 1048; CHECK-NEXT: svm %s1, %vm2, 0 1049; CHECK-NEXT: st %s1, 32(, %s0) 1050; CHECK-NEXT: svm %s1, %vm3, 3 1051; CHECK-NEXT: st %s1, 24(, %s0) 1052; CHECK-NEXT: svm %s1, %vm3, 2 1053; CHECK-NEXT: st %s1, 16(, %s0) 1054; CHECK-NEXT: svm %s1, %vm3, 1 1055; CHECK-NEXT: st %s1, 8(, %s0) 1056; CHECK-NEXT: svm %s1, %vm3, 0 1057; CHECK-NEXT: st %s1, (, %s0) 1058; CHECK-NEXT: svm %s16, %vm3, 0 1059; CHECK-NEXT: st %s16, 320(, %s17) 1060; CHECK-NEXT: svm %s16, %vm3, 1 1061; CHECK-NEXT: st %s16, 328(, %s17) 1062; CHECK-NEXT: svm %s16, %vm3, 2 1063; CHECK-NEXT: st %s16, 336(, %s17) 1064; CHECK-NEXT: svm %s16, %vm3, 3 1065; CHECK-NEXT: st %s16, 344(, %s17) 1066; CHECK-NEXT: svm %s16, %vm2, 0 1067; CHECK-NEXT: st %s16, 352(, %s17) 1068; CHECK-NEXT: svm %s16, %vm2, 1 1069; CHECK-NEXT: st %s16, 360(, %s17) 1070; CHECK-NEXT: svm %s16, %vm2, 2 1071; CHECK-NEXT: st %s16, 368(, %s17) 1072; CHECK-NEXT: svm %s16, %vm2, 3 1073; CHECK-NEXT: st %s16, 376(, %s17) 1074; CHECK-NEXT: svm %s16, %vm3, 0 1075; CHECK-NEXT: st %s16, 256(, %s17) 1076; CHECK-NEXT: svm %s16, %vm3, 1 1077; CHECK-NEXT: st %s16, 264(, %s17) 1078; CHECK-NEXT: svm %s16, %vm3, 2 1079; CHECK-NEXT: st %s16, 272(, %s17) 1080; CHECK-NEXT: svm %s16, %vm3, 3 1081; CHECK-NEXT: st %s16, 280(, %s17) 1082; CHECK-NEXT: svm %s16, %vm2, 0 1083; CHECK-NEXT: st %s16, 288(, %s17) 1084; CHECK-NEXT: svm %s16, %vm2, 1 1085; CHECK-NEXT: st %s16, 296(, %s17) 1086; CHECK-NEXT: svm %s16, %vm2, 2 1087; CHECK-NEXT: st %s16, 304(, %s17) 1088; CHECK-NEXT: svm %s16, %vm2, 3 1089; CHECK-NEXT: st %s16, 312(, %s17) 1090; CHECK-NEXT: or %s11, 0, %s9 1091; CHECK-NEXT: ld %s17, 40(, %s11) 1092; CHECK-NEXT: ld %s10, 8(, %s11) 1093; CHECK-NEXT: ld %s9, (, %s11) 1094; CHECK-NEXT: b.l.t (, %s10) 1095 %3 = alloca <512 x i1>, align 32 1096 %4 = alloca <512 x i1>, align 64 1097 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %3) 1098 %5 = alloca i8, i64 %1, align 8 1099 store volatile <512 x i1> %0, ptr %5, align 64, !tbaa !3 1100 store volatile <512 x i1> %0, ptr %3, align 32, !tbaa !3 1101 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %4) 1102 store volatile <512 x i1> %0, ptr %4, align 64, !tbaa !3 1103 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %4) 1104 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %3) 1105 ret void 1106} 1107 1108; Function Attrs: nounwind 1109define fastcc void @store__vm512_stk_dyn_align_spill(<512 x i1> noundef %0, i64 noundef %1) { 1110; CHECK-LABEL: store__vm512_stk_dyn_align_spill: 1111; CHECK: # %bb.0: 1112; CHECK-NEXT: st %s9, (, %s11) 1113; CHECK-NEXT: st %s10, 8(, %s11) 1114; CHECK-NEXT: st %s17, 40(, %s11) 1115; CHECK-NEXT: or %s9, 0, %s11 1116; CHECK-NEXT: lea %s11, -384(, %s11) 1117; CHECK-NEXT: and %s11, %s11, (59)1 1118; CHECK-NEXT: or %s17, 0, %s11 1119; CHECK-NEXT: brge.l.t %s11, %s8, .LBB15_2 1120; CHECK-NEXT: # %bb.1: 1121; CHECK-NEXT: ld %s61, 24(, %s14) 1122; CHECK-NEXT: or %s62, 0, %s0 1123; CHECK-NEXT: lea %s63, 315 1124; CHECK-NEXT: shm.l %s63, (%s61) 1125; CHECK-NEXT: shm.l %s8, 8(%s61) 1126; CHECK-NEXT: shm.l %s11, 16(%s61) 1127; CHECK-NEXT: monc 1128; CHECK-NEXT: or %s0, 0, %s62 1129; CHECK-NEXT: .LBB15_2: 1130; CHECK-NEXT: st %s18, 48(, %s9) # 8-byte Folded Spill 1131; CHECK-NEXT: st %s19, 56(, %s9) # 8-byte Folded Spill 1132; CHECK-NEXT: or %s18, 0, %s0 1133; CHECK-NEXT: svm %s16, %vm3, 0 1134; CHECK-NEXT: st %s16, 256(, %s17) 1135; CHECK-NEXT: svm %s16, %vm3, 1 1136; CHECK-NEXT: st %s16, 264(, %s17) 1137; CHECK-NEXT: svm %s16, %vm3, 2 1138; CHECK-NEXT: st %s16, 272(, %s17) 1139; CHECK-NEXT: svm %s16, %vm3, 3 1140; CHECK-NEXT: st %s16, 280(, %s17) 1141; CHECK-NEXT: svm %s16, %vm2, 0 1142; CHECK-NEXT: st %s16, 288(, %s17) 1143; CHECK-NEXT: svm %s16, %vm2, 1 1144; CHECK-NEXT: st %s16, 296(, %s17) 1145; CHECK-NEXT: svm %s16, %vm2, 2 1146; CHECK-NEXT: st %s16, 304(, %s17) 1147; CHECK-NEXT: svm %s16, %vm2, 3 1148; CHECK-NEXT: st %s16, 312(, %s17) # 64-byte Folded Spill 1149; CHECK-NEXT: sll %s0, %s0, 6 1150; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 1151; CHECK-NEXT: and %s1, %s1, (32)0 1152; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 1153; CHECK-NEXT: bsic %s10, (, %s12) 1154; CHECK-NEXT: lea %s19, 240(, %s11) 1155; CHECK-NEXT: lea %s0, dummy@lo 1156; CHECK-NEXT: and %s0, %s0, (32)0 1157; CHECK-NEXT: lea.sl %s12, dummy@hi(, %s0) 1158; CHECK-NEXT: bsic %s10, (, %s12) 1159; CHECK-NEXT: lea %s0, pass@lo 1160; CHECK-NEXT: and %s0, %s0, (32)0 1161; CHECK-NEXT: lea.sl %s12, pass@hi(, %s0) 1162; CHECK-NEXT: or %s0, 0, %s18 1163; CHECK-NEXT: bsic %s10, (, %s12) 1164; CHECK-NEXT: # implicit-def: $vmp1 1165; CHECK-NEXT: ld %s16, 256(, %s17) 1166; CHECK-NEXT: lvm %vm3, 0, %s16 1167; CHECK-NEXT: ld %s16, 264(, %s17) 1168; CHECK-NEXT: lvm %vm3, 1, %s16 1169; CHECK-NEXT: ld %s16, 272(, %s17) 1170; CHECK-NEXT: lvm %vm3, 2, %s16 1171; CHECK-NEXT: ld %s16, 280(, %s17) 1172; CHECK-NEXT: lvm %vm3, 3, %s16 1173; CHECK-NEXT: ld %s16, 288(, %s17) 1174; CHECK-NEXT: lvm %vm2, 0, %s16 1175; CHECK-NEXT: ld %s16, 296(, %s17) 1176; CHECK-NEXT: lvm %vm2, 1, %s16 1177; CHECK-NEXT: ld %s16, 304(, %s17) 1178; CHECK-NEXT: lvm %vm2, 2, %s16 1179; CHECK-NEXT: ld %s16, 312(, %s17) # 64-byte Folded Reload 1180; CHECK-NEXT: lvm %vm2, 3, %s16 1181; CHECK-NEXT: svm %s0, %vm2, 3 1182; CHECK-NEXT: st %s0, 56(, %s19) 1183; CHECK-NEXT: svm %s0, %vm2, 2 1184; CHECK-NEXT: st %s0, 48(, %s19) 1185; CHECK-NEXT: svm %s0, %vm2, 1 1186; CHECK-NEXT: st %s0, 40(, %s19) 1187; CHECK-NEXT: svm %s0, %vm2, 0 1188; CHECK-NEXT: st %s0, 32(, %s19) 1189; CHECK-NEXT: svm %s0, %vm3, 3 1190; CHECK-NEXT: st %s0, 24(, %s19) 1191; CHECK-NEXT: svm %s0, %vm3, 2 1192; CHECK-NEXT: st %s0, 16(, %s19) 1193; CHECK-NEXT: svm %s0, %vm3, 1 1194; CHECK-NEXT: st %s0, 8(, %s19) 1195; CHECK-NEXT: svm %s0, %vm3, 0 1196; CHECK-NEXT: st %s0, (, %s19) 1197; CHECK-NEXT: svm %s16, %vm3, 0 1198; CHECK-NEXT: st %s16, 320(, %s17) 1199; CHECK-NEXT: svm %s16, %vm3, 1 1200; CHECK-NEXT: st %s16, 328(, %s17) 1201; CHECK-NEXT: svm %s16, %vm3, 2 1202; CHECK-NEXT: st %s16, 336(, %s17) 1203; CHECK-NEXT: svm %s16, %vm3, 3 1204; CHECK-NEXT: st %s16, 344(, %s17) 1205; CHECK-NEXT: svm %s16, %vm2, 0 1206; CHECK-NEXT: st %s16, 352(, %s17) 1207; CHECK-NEXT: svm %s16, %vm2, 1 1208; CHECK-NEXT: st %s16, 360(, %s17) 1209; CHECK-NEXT: svm %s16, %vm2, 2 1210; CHECK-NEXT: st %s16, 368(, %s17) 1211; CHECK-NEXT: svm %s16, %vm2, 3 1212; CHECK-NEXT: st %s16, 376(, %s17) 1213; CHECK-NEXT: ld %s19, 56(, %s9) # 8-byte Folded Reload 1214; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload 1215; CHECK-NEXT: or %s11, 0, %s9 1216; CHECK-NEXT: ld %s17, 40(, %s11) 1217; CHECK-NEXT: ld %s10, 8(, %s11) 1218; CHECK-NEXT: ld %s9, (, %s11) 1219; CHECK-NEXT: b.l.t (, %s10) 1220 %3 = alloca <512 x i1>, align 32 1221 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %3) 1222 %4 = alloca <512 x i1>, i64 %1, align 8 1223 tail call fastcc void @dummy() 1224 tail call fastcc void @pass(i64 noundef %1) 1225 store volatile <512 x i1> %0, ptr %4, align 64, !tbaa !3 1226 store volatile <512 x i1> %0, ptr %3, align 32, !tbaa !3 1227 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %3) 1228 ret void 1229} 1230 1231!2 = !{!"clang version 15.0.0 (git@kaz7.github.com:sx-aurora-dev/llvm-project.git 6c510cbf7e17baa380bf8a181c3b43145fd50980)"} 1232!3 = !{!4, !4, i64 0} 1233!4 = !{!"omnipotent char", !5, i64 0} 1234!5 = !{!"Simple C/C++ TBAA"} 1235!6 = !{!7, !7, i64 0} 1236!7 = !{!"long", !4, i64 0} 1237!8 = distinct !{!8, !9} 1238!9 = !{!"llvm.loop.mustprogress"} 1239!10 = distinct !{!10, !9} 1240!11 = distinct !{!11, !9} 1241!12 = distinct !{!12, !9} 1242