1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s 3 4;;; Test store instructions 5;;; 6;;; Note: 7;;; We test store instructions using general stack, stack with dynamic 8;;; allocation, stack with dynamic allocation and alignment, and stack 9;;; with dynamic allocation, alignment, and spill. 10;;; 11;;; Fist test using a stack for leaf function. 12;;; 13;;; | | Higher address 14;;; |----------------------------------------------| <- old sp 15;;; | Local variables of fixed size | 16;;; |----------------------------------------------| <- sp 17;;; | | Lower address 18;;; 19;;; Access local variable using sp (%s11). In addition, please remember 20;;; that stack is aligned by 16 bytes. 21;;; 22;;; Second test using a general stack. 23;;; 24;;; | | Higher address 25;;; |----------------------------------------------| 26;;; | Parameter area for this function | 27;;; |----------------------------------------------| 28;;; | Register save area (RSA) for this function | 29;;; |----------------------------------------------| 30;;; | Return address for this function | 31;;; |----------------------------------------------| 32;;; | Frame pointer for this function | 33;;; |----------------------------------------------| <- fp(=old sp) 34;;; | Local variables of fixed size | 35;;; |----------------------------------------------| 36;;; |.variable-sized.local.variables.(VLAs)........| 37;;; |..............................................| 38;;; |..............................................| 39;;; |----------------------------------------------| <- returned by alloca 40;;; | Parameter area for callee | 41;;; |----------------------------------------------| 42;;; | Register save area (RSA) for callee | 43;;; |----------------------------------------------| 44;;; | Return address for callee | 45;;; |----------------------------------------------| 46;;; | Frame pointer for callee | 47;;; |----------------------------------------------| <- sp 48;;; | | Lower address 49;;; 50;;; Access local variable using fp (%s9) since the size of VLA is not 51;;; known. At the beginning of the functions, allocates 240 + data 52;;; bytes. 240 means RSA+RA+FP (=176) + Parameter (=64). 53;;; 54;;; Third test using a general stack. 55;;; 56;;; | | Higher address 57;;; |----------------------------------------------| 58;;; | Parameter area for this function | 59;;; |----------------------------------------------| 60;;; | Register save area (RSA) for this function | 61;;; |----------------------------------------------| 62;;; | Return address for this function | 63;;; |----------------------------------------------| 64;;; | Frame pointer for this function | 65;;; |----------------------------------------------| <- fp(=old sp) 66;;; |.empty.space.to.make.part.below.aligned.in....| 67;;; |.case.it.needs.more.than.the.standard.16-byte.| (size of this area is 68;;; |.alignment....................................| unknown at compile time) 69;;; |----------------------------------------------| 70;;; | Local variables of fixed size including spill| 71;;; | slots | 72;;; |----------------------------------------------| <- bp(not defined by ABI, 73;;; |.variable-sized.local.variables.(VLAs)........| LLVM chooses SX17) 74;;; |..............................................| (size of this area is 75;;; |..............................................| unknown at compile time) 76;;; |----------------------------------------------| <- stack top (returned by 77;;; | Parameter area for callee | alloca) 78;;; |----------------------------------------------| 79;;; | Register save area (RSA) for callee | 80;;; |----------------------------------------------| 81;;; | Return address for callee | 82;;; |----------------------------------------------| 83;;; | Frame pointer for callee | 84;;; |----------------------------------------------| <- sp 85;;; | | Lower address 86;;; 87;;; Access local variable using bp (%s17) since the size of alignment 88;;; and VLA are not known. At the beginning of the functions, allocates 89;;; pad(240 + data + align) bytes. Then, access data through bp + pad(240) 90;;; since this address doesn't change even if VLA is dynamically allocated. 91;;; 92;;; Fourth test using a general stack with some spills. 93;;; 94 95; Function Attrs: argmemonly mustprogress nofree nounwind willreturn 96define fastcc <256 x i1> @load__vm256_stk() { 97; CHECK-LABEL: load__vm256_stk: 98; CHECK: # %bb.0: 99; CHECK-NEXT: st %s9, (, %s11) 100; CHECK-NEXT: st %s10, 8(, %s11) 101; CHECK-NEXT: or %s9, 0, %s11 102; CHECK-NEXT: lea %s11, -224(, %s11) 103; CHECK-NEXT: and %s11, %s11, (59)1 104; CHECK-NEXT: brge.l.t %s11, %s8, .LBB0_2 105; CHECK-NEXT: # %bb.1: 106; CHECK-NEXT: ld %s61, 24(, %s14) 107; CHECK-NEXT: or %s62, 0, %s0 108; CHECK-NEXT: lea %s63, 315 109; CHECK-NEXT: shm.l %s63, (%s61) 110; CHECK-NEXT: shm.l %s8, 8(%s61) 111; CHECK-NEXT: shm.l %s11, 16(%s61) 112; CHECK-NEXT: monc 113; CHECK-NEXT: or %s0, 0, %s62 114; CHECK-NEXT: .LBB0_2: 115; CHECK-NEXT: ld %s16, 192(, %s11) 116; CHECK-NEXT: lvm %vm1, 0, %s16 117; CHECK-NEXT: ld %s16, 200(, %s11) 118; CHECK-NEXT: lvm %vm1, 1, %s16 119; CHECK-NEXT: ld %s16, 208(, %s11) 120; CHECK-NEXT: lvm %vm1, 2, %s16 121; CHECK-NEXT: ld %s16, 216(, %s11) 122; CHECK-NEXT: lvm %vm1, 3, %s16 123; CHECK-NEXT: or %s11, 0, %s9 124; CHECK-NEXT: ld %s10, 8(, %s11) 125; CHECK-NEXT: ld %s9, (, %s11) 126; CHECK-NEXT: b.l.t (, %s10) 127 %1 = alloca <256 x i1>, align 32 128 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %1) 129 %2 = load volatile <256 x i1>, ptr %1, align 32 130 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %1) 131 ret <256 x i1> %2 132} 133 134; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn 135declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) 136 137; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn 138declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) 139 140; Function Attrs: argmemonly nofree nounwind 141define fastcc <256 x i1> @load__vm256_stk_big_fit() { 142; CHECK-LABEL: load__vm256_stk_big_fit: 143; CHECK: # %bb.0: 144; CHECK-NEXT: st %s9, (, %s11) 145; CHECK-NEXT: st %s10, 8(, %s11) 146; CHECK-NEXT: or %s9, 0, %s11 147; CHECK-NEXT: lea %s11, -2147483648(, %s11) 148; CHECK-NEXT: and %s11, %s11, (59)1 149; CHECK-NEXT: brge.l %s11, %s8, .LBB1_4 150; CHECK-NEXT: # %bb.3: 151; CHECK-NEXT: ld %s61, 24(, %s14) 152; CHECK-NEXT: or %s62, 0, %s0 153; CHECK-NEXT: lea %s63, 315 154; CHECK-NEXT: shm.l %s63, (%s61) 155; CHECK-NEXT: shm.l %s8, 8(%s61) 156; CHECK-NEXT: shm.l %s11, 16(%s61) 157; CHECK-NEXT: monc 158; CHECK-NEXT: or %s0, 0, %s62 159; CHECK-NEXT: .LBB1_4: 160; CHECK-NEXT: ld %s16, 2147483616(, %s11) 161; CHECK-NEXT: lvm %vm1, 0, %s16 162; CHECK-NEXT: ld %s16, 2147483624(, %s11) 163; CHECK-NEXT: lvm %vm1, 1, %s16 164; CHECK-NEXT: ld %s16, 2147483632(, %s11) 165; CHECK-NEXT: lvm %vm1, 2, %s16 166; CHECK-NEXT: ld %s16, 2147483640(, %s11) 167; CHECK-NEXT: lvm %vm1, 3, %s16 168; CHECK-NEXT: or %s0, 0, (0)1 169; CHECK-NEXT: lea %s1, 2147483424 170; CHECK-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 171; CHECK-NEXT: ld %s2, 192(%s0, %s11) 172; CHECK-NEXT: lea %s0, 8(, %s0) 173; CHECK-NEXT: brne.l %s0, %s1, .LBB1_1 174; CHECK-NEXT: # %bb.2: 175; CHECK-NEXT: or %s11, 0, %s9 176; CHECK-NEXT: ld %s10, 8(, %s11) 177; CHECK-NEXT: ld %s9, (, %s11) 178; CHECK-NEXT: b.l.t (, %s10) 179 %1 = alloca <256 x i1>, align 32 180 %2 = alloca [268435428 x i64], align 8 181 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %1) 182 call void @llvm.lifetime.start.p0(i64 2147483424, ptr nonnull %2) 183 %3 = load volatile <256 x i1>, ptr %1, align 32 184 br label %5 185 1864: ; preds = %5 187 call void @llvm.lifetime.end.p0(i64 2147483424, ptr nonnull %2) 188 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %1) 189 ret <256 x i1> %3 190 1915: ; preds = %0, %5 192 %6 = phi i64 [ 0, %0 ], [ %9, %5 ] 193 %7 = getelementptr inbounds [268435428 x i64], ptr %2, i64 0, i64 %6 194 %8 = load volatile i64, ptr %7, align 8, !tbaa !3 195 %9 = add nuw nsw i64 %6, 1 196 %10 = icmp eq i64 %9, 268435428 197 br i1 %10, label %4, label %5, !llvm.loop !7 198} 199 200; Function Attrs: argmemonly nofree nounwind 201define fastcc <256 x i1> @load__vm256_stk_big() { 202; CHECK-LABEL: load__vm256_stk_big: 203; CHECK: # %bb.0: 204; CHECK-NEXT: st %s9, (, %s11) 205; CHECK-NEXT: st %s10, 8(, %s11) 206; CHECK-NEXT: or %s9, 0, %s11 207; CHECK-NEXT: lea %s13, 2147483616 208; CHECK-NEXT: and %s13, %s13, (32)0 209; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) 210; CHECK-NEXT: and %s11, %s11, (59)1 211; CHECK-NEXT: brge.l %s11, %s8, .LBB2_4 212; CHECK-NEXT: # %bb.3: 213; CHECK-NEXT: ld %s61, 24(, %s14) 214; CHECK-NEXT: or %s62, 0, %s0 215; CHECK-NEXT: lea %s63, 315 216; CHECK-NEXT: shm.l %s63, (%s61) 217; CHECK-NEXT: shm.l %s8, 8(%s61) 218; CHECK-NEXT: shm.l %s11, 16(%s61) 219; CHECK-NEXT: monc 220; CHECK-NEXT: or %s0, 0, %s62 221; CHECK-NEXT: .LBB2_4: 222; CHECK-NEXT: lea %s13, -2147483648 223; CHECK-NEXT: and %s13, %s13, (32)0 224; CHECK-NEXT: lea.sl %s13, (%s11, %s13) 225; CHECK-NEXT: ld %s16, (, %s13) 226; CHECK-NEXT: lvm %vm1, 0, %s16 227; CHECK-NEXT: ld %s16, 8(, %s13) 228; CHECK-NEXT: lvm %vm1, 1, %s16 229; CHECK-NEXT: ld %s16, 16(, %s13) 230; CHECK-NEXT: lvm %vm1, 2, %s16 231; CHECK-NEXT: ld %s16, 24(, %s13) 232; CHECK-NEXT: lvm %vm1, 3, %s16 233; CHECK-NEXT: or %s0, 0, (0)1 234; CHECK-NEXT: lea %s1, 2147483432 235; CHECK-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 236; CHECK-NEXT: ld %s2, 216(%s0, %s11) 237; CHECK-NEXT: lea %s0, 8(, %s0) 238; CHECK-NEXT: brne.l %s0, %s1, .LBB2_1 239; CHECK-NEXT: # %bb.2: 240; CHECK-NEXT: or %s11, 0, %s9 241; CHECK-NEXT: ld %s10, 8(, %s11) 242; CHECK-NEXT: ld %s9, (, %s11) 243; CHECK-NEXT: b.l.t (, %s10) 244 %1 = alloca <256 x i1>, align 32 245 %2 = alloca [268435429 x i64], align 8 246 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %1) 247 call void @llvm.lifetime.start.p0(i64 2147483432, ptr nonnull %2) 248 %3 = load volatile <256 x i1>, ptr %1, align 32 249 br label %5 250 2514: ; preds = %5 252 call void @llvm.lifetime.end.p0(i64 2147483432, ptr nonnull %2) 253 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %1) 254 ret <256 x i1> %3 255 2565: ; preds = %0, %5 257 %6 = phi i64 [ 0, %0 ], [ %9, %5 ] 258 %7 = getelementptr inbounds [268435429 x i64], ptr %2, i64 0, i64 %6 259 %8 = load volatile i64, ptr %7, align 8, !tbaa !3 260 %9 = add nuw nsw i64 %6, 1 261 %10 = icmp eq i64 %9, 268435429 262 br i1 %10, label %4, label %5, !llvm.loop !9 263} 264 265; Function Attrs: argmemonly nofree nounwind 266define fastcc <256 x i1> @load__vm256_stk_big2() { 267; CHECK-LABEL: load__vm256_stk_big2: 268; CHECK: # %bb.0: 269; CHECK-NEXT: st %s9, (, %s11) 270; CHECK-NEXT: st %s10, 8(, %s11) 271; CHECK-NEXT: or %s9, 0, %s11 272; CHECK-NEXT: lea %s13, 2147483424 273; CHECK-NEXT: and %s13, %s13, (32)0 274; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) 275; CHECK-NEXT: and %s11, %s11, (59)1 276; CHECK-NEXT: brge.l %s11, %s8, .LBB3_4 277; CHECK-NEXT: # %bb.3: 278; CHECK-NEXT: ld %s61, 24(, %s14) 279; CHECK-NEXT: or %s62, 0, %s0 280; CHECK-NEXT: lea %s63, 315 281; CHECK-NEXT: shm.l %s63, (%s61) 282; CHECK-NEXT: shm.l %s8, 8(%s61) 283; CHECK-NEXT: shm.l %s11, 16(%s61) 284; CHECK-NEXT: monc 285; CHECK-NEXT: or %s0, 0, %s62 286; CHECK-NEXT: .LBB3_4: 287; CHECK-NEXT: lea %s13, -2147483456 288; CHECK-NEXT: and %s13, %s13, (32)0 289; CHECK-NEXT: lea.sl %s13, (%s11, %s13) 290; CHECK-NEXT: ld %s16, (, %s13) 291; CHECK-NEXT: lvm %vm1, 0, %s16 292; CHECK-NEXT: ld %s16, 8(, %s13) 293; CHECK-NEXT: lvm %vm1, 1, %s16 294; CHECK-NEXT: ld %s16, 16(, %s13) 295; CHECK-NEXT: lvm %vm1, 2, %s16 296; CHECK-NEXT: ld %s16, 24(, %s13) 297; CHECK-NEXT: lvm %vm1, 3, %s16 298; CHECK-NEXT: or %s0, 0, (0)1 299; CHECK-NEXT: lea %s1, -2147483648 300; CHECK-NEXT: and %s1, %s1, (32)0 301; CHECK-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 302; CHECK-NEXT: ld %s2, 192(%s0, %s11) 303; CHECK-NEXT: lea %s0, 8(, %s0) 304; CHECK-NEXT: brne.l %s0, %s1, .LBB3_1 305; CHECK-NEXT: # %bb.2: 306; CHECK-NEXT: or %s11, 0, %s9 307; CHECK-NEXT: ld %s10, 8(, %s11) 308; CHECK-NEXT: ld %s9, (, %s11) 309; CHECK-NEXT: b.l.t (, %s10) 310 %1 = alloca <256 x i1>, align 32 311 %2 = alloca [268435456 x i64], align 8 312 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %1) 313 call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %2) 314 %3 = load volatile <256 x i1>, ptr %1, align 32 315 br label %5 316 3174: ; preds = %5 318 call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %2) 319 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %1) 320 ret <256 x i1> %3 321 3225: ; preds = %0, %5 323 %6 = phi i64 [ 0, %0 ], [ %9, %5 ] 324 %7 = getelementptr inbounds [268435456 x i64], ptr %2, i64 0, i64 %6 325 %8 = load volatile i64, ptr %7, align 8, !tbaa !3 326 %9 = add nuw nsw i64 %6, 1 327 %10 = icmp eq i64 %9, 268435456 328 br i1 %10, label %4, label %5, !llvm.loop !10 329} 330 331; Function Attrs: argmemonly mustprogress nofree nounwind willreturn 332define fastcc <256 x i1> @load__vm256_stk_dyn(i64 noundef %0) { 333; CHECK-LABEL: load__vm256_stk_dyn: 334; CHECK: # %bb.0: 335; CHECK-NEXT: st %s9, (, %s11) 336; CHECK-NEXT: st %s10, 8(, %s11) 337; CHECK-NEXT: or %s9, 0, %s11 338; CHECK-NEXT: lea %s11, -272(, %s11) 339; CHECK-NEXT: brge.l.t %s11, %s8, .LBB4_2 340; CHECK-NEXT: # %bb.1: 341; CHECK-NEXT: ld %s61, 24(, %s14) 342; CHECK-NEXT: or %s62, 0, %s0 343; CHECK-NEXT: lea %s63, 315 344; CHECK-NEXT: shm.l %s63, (%s61) 345; CHECK-NEXT: shm.l %s8, 8(%s61) 346; CHECK-NEXT: shm.l %s11, 16(%s61) 347; CHECK-NEXT: monc 348; CHECK-NEXT: or %s0, 0, %s62 349; CHECK-NEXT: .LBB4_2: 350; CHECK-NEXT: sll %s0, %s0, 5 351; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 352; CHECK-NEXT: and %s1, %s1, (32)0 353; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 354; CHECK-NEXT: bsic %s10, (, %s12) 355; CHECK-NEXT: lea %s0, 240(, %s11) 356; CHECK-NEXT: ld %s1, 24(, %s0) 357; CHECK-NEXT: ld %s1, 16(, %s0) 358; CHECK-NEXT: ld %s1, 8(, %s0) 359; CHECK-NEXT: ld %s0, (, %s0) 360; CHECK-NEXT: ld %s16, -32(, %s9) 361; CHECK-NEXT: lvm %vm1, 0, %s16 362; CHECK-NEXT: ld %s16, -24(, %s9) 363; CHECK-NEXT: lvm %vm1, 1, %s16 364; CHECK-NEXT: ld %s16, -16(, %s9) 365; CHECK-NEXT: lvm %vm1, 2, %s16 366; CHECK-NEXT: ld %s16, -8(, %s9) 367; CHECK-NEXT: lvm %vm1, 3, %s16 368; CHECK-NEXT: or %s11, 0, %s9 369; CHECK-NEXT: ld %s10, 8(, %s11) 370; CHECK-NEXT: ld %s9, (, %s11) 371; CHECK-NEXT: b.l.t (, %s10) 372 %2 = alloca <256 x i1>, align 8 373 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %2) 374 %3 = alloca <256 x i1>, i64 %0, align 8 375 %4 = load volatile <256 x i1>, ptr %3, align 32 376 %5 = load volatile <256 x i1>, ptr %2, align 32 377 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %2) 378 ret <256 x i1> %5 379} 380 381; Function Attrs: argmemonly mustprogress nofree nounwind willreturn 382define fastcc <256 x i1> @load__vm256_stk_dyn_align(i64 noundef %0) { 383; CHECK-LABEL: load__vm256_stk_dyn_align: 384; CHECK: # %bb.0: 385; CHECK-NEXT: st %s9, (, %s11) 386; CHECK-NEXT: st %s10, 8(, %s11) 387; CHECK-NEXT: st %s17, 40(, %s11) 388; CHECK-NEXT: or %s9, 0, %s11 389; CHECK-NEXT: lea %s11, -288(, %s11) 390; CHECK-NEXT: and %s11, %s11, (59)1 391; CHECK-NEXT: or %s17, 0, %s11 392; CHECK-NEXT: brge.l.t %s11, %s8, .LBB5_2 393; CHECK-NEXT: # %bb.1: 394; CHECK-NEXT: ld %s61, 24(, %s14) 395; CHECK-NEXT: or %s62, 0, %s0 396; CHECK-NEXT: lea %s63, 315 397; CHECK-NEXT: shm.l %s63, (%s61) 398; CHECK-NEXT: shm.l %s8, 8(%s61) 399; CHECK-NEXT: shm.l %s11, 16(%s61) 400; CHECK-NEXT: monc 401; CHECK-NEXT: or %s0, 0, %s62 402; CHECK-NEXT: .LBB5_2: 403; CHECK-NEXT: sll %s0, %s0, 5 404; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 405; CHECK-NEXT: and %s1, %s1, (32)0 406; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 407; CHECK-NEXT: bsic %s10, (, %s12) 408; CHECK-NEXT: lea %s0, 240(, %s11) 409; CHECK-NEXT: ld %s1, 24(, %s0) 410; CHECK-NEXT: ld %s1, 16(, %s0) 411; CHECK-NEXT: ld %s1, 8(, %s0) 412; CHECK-NEXT: ld %s0, (, %s0) 413; CHECK-NEXT: ld %s16, 256(, %s17) 414; CHECK-NEXT: lvm %vm1, 0, %s16 415; CHECK-NEXT: ld %s16, 264(, %s17) 416; CHECK-NEXT: lvm %vm1, 1, %s16 417; CHECK-NEXT: ld %s16, 272(, %s17) 418; CHECK-NEXT: lvm %vm1, 2, %s16 419; CHECK-NEXT: ld %s16, 280(, %s17) 420; CHECK-NEXT: lvm %vm1, 3, %s16 421; CHECK-NEXT: or %s11, 0, %s9 422; CHECK-NEXT: ld %s17, 40(, %s11) 423; CHECK-NEXT: ld %s10, 8(, %s11) 424; CHECK-NEXT: ld %s9, (, %s11) 425; CHECK-NEXT: b.l.t (, %s10) 426 %2 = alloca <256 x i1>, align 32 427 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %2) 428 %3 = alloca <256 x i1>, i64 %0, align 8 429 %4 = load volatile <256 x i1>, ptr %3, align 32 430 %5 = load volatile <256 x i1>, ptr %2, align 32 431 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %2) 432 ret <256 x i1> %5 433} 434 435; Function Attrs: argmemonly mustprogress nofree nounwind willreturn 436define fastcc <256 x i1> @load__vm256_stk_dyn_align2(i64 noundef %0) { 437; CHECK-LABEL: load__vm256_stk_dyn_align2: 438; CHECK: # %bb.0: 439; CHECK-NEXT: st %s9, (, %s11) 440; CHECK-NEXT: st %s10, 8(, %s11) 441; CHECK-NEXT: st %s17, 40(, %s11) 442; CHECK-NEXT: or %s9, 0, %s11 443; CHECK-NEXT: lea %s11, -320(, %s11) 444; CHECK-NEXT: and %s11, %s11, (58)1 445; CHECK-NEXT: or %s17, 0, %s11 446; CHECK-NEXT: brge.l.t %s11, %s8, .LBB6_2 447; CHECK-NEXT: # %bb.1: 448; CHECK-NEXT: ld %s61, 24(, %s14) 449; CHECK-NEXT: or %s62, 0, %s0 450; CHECK-NEXT: lea %s63, 315 451; CHECK-NEXT: shm.l %s63, (%s61) 452; CHECK-NEXT: shm.l %s8, 8(%s61) 453; CHECK-NEXT: shm.l %s11, 16(%s61) 454; CHECK-NEXT: monc 455; CHECK-NEXT: or %s0, 0, %s62 456; CHECK-NEXT: .LBB6_2: 457; CHECK-NEXT: sll %s0, %s0, 5 458; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 459; CHECK-NEXT: and %s1, %s1, (32)0 460; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 461; CHECK-NEXT: bsic %s10, (, %s12) 462; CHECK-NEXT: lea %s0, 240(, %s11) 463; CHECK-NEXT: ld %s1, 24(, %s0) 464; CHECK-NEXT: ld %s1, 16(, %s0) 465; CHECK-NEXT: ld %s1, 8(, %s0) 466; CHECK-NEXT: ld %s0, (, %s0) 467; CHECK-NEXT: ld %s16, 288(, %s17) 468; CHECK-NEXT: lvm %vm1, 0, %s16 469; CHECK-NEXT: ld %s16, 296(, %s17) 470; CHECK-NEXT: lvm %vm1, 1, %s16 471; CHECK-NEXT: ld %s16, 304(, %s17) 472; CHECK-NEXT: lvm %vm1, 2, %s16 473; CHECK-NEXT: ld %s16, 312(, %s17) 474; CHECK-NEXT: lvm %vm1, 3, %s16 475; CHECK-NEXT: ld %s16, 256(, %s17) 476; CHECK-NEXT: lvm %vm2, 0, %s16 477; CHECK-NEXT: ld %s16, 264(, %s17) 478; CHECK-NEXT: lvm %vm2, 1, %s16 479; CHECK-NEXT: ld %s16, 272(, %s17) 480; CHECK-NEXT: lvm %vm2, 2, %s16 481; CHECK-NEXT: ld %s16, 280(, %s17) 482; CHECK-NEXT: lvm %vm2, 3, %s16 483; CHECK-NEXT: or %s11, 0, %s9 484; CHECK-NEXT: ld %s17, 40(, %s11) 485; CHECK-NEXT: ld %s10, 8(, %s11) 486; CHECK-NEXT: ld %s9, (, %s11) 487; CHECK-NEXT: b.l.t (, %s10) 488 %2 = alloca <256 x i1>, align 32 489 %3 = alloca <256 x i1>, align 64 490 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %2) 491 %4 = alloca <256 x i1>, i64 %0, align 8 492 %5 = load volatile <256 x i1>, ptr %4, align 32 493 %6 = load volatile <256 x i1>, ptr %2, align 32 494 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %3) 495 %7 = load volatile <256 x i1>, ptr %3, align 64 496 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %3) 497 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %2) 498 ret <256 x i1> %6 499} 500 501; Function Attrs: nounwind 502define fastcc <256 x i1> @load__vm256_stk_dyn_align_spill(i64 noundef %0) { 503; CHECK-LABEL: load__vm256_stk_dyn_align_spill: 504; CHECK: # %bb.0: 505; CHECK-NEXT: st %s9, (, %s11) 506; CHECK-NEXT: st %s10, 8(, %s11) 507; CHECK-NEXT: st %s17, 40(, %s11) 508; CHECK-NEXT: or %s9, 0, %s11 509; CHECK-NEXT: lea %s11, -320(, %s11) 510; CHECK-NEXT: and %s11, %s11, (59)1 511; CHECK-NEXT: or %s17, 0, %s11 512; CHECK-NEXT: brge.l.t %s11, %s8, .LBB7_2 513; CHECK-NEXT: # %bb.1: 514; CHECK-NEXT: ld %s61, 24(, %s14) 515; CHECK-NEXT: or %s62, 0, %s0 516; CHECK-NEXT: lea %s63, 315 517; CHECK-NEXT: shm.l %s63, (%s61) 518; CHECK-NEXT: shm.l %s8, 8(%s61) 519; CHECK-NEXT: shm.l %s11, 16(%s61) 520; CHECK-NEXT: monc 521; CHECK-NEXT: or %s0, 0, %s62 522; CHECK-NEXT: .LBB7_2: 523; CHECK-NEXT: st %s18, 48(, %s9) # 8-byte Folded Spill 524; CHECK-NEXT: or %s18, 0, %s0 525; CHECK-NEXT: lea %s0, 15(, %s0) 526; CHECK-NEXT: and %s0, -16, %s0 527; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 528; CHECK-NEXT: and %s1, %s1, (32)0 529; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 530; CHECK-NEXT: bsic %s10, (, %s12) 531; CHECK-NEXT: lea %s0, 240(, %s11) 532; CHECK-NEXT: ld %s1, 24(, %s0) 533; CHECK-NEXT: ld %s1, 16(, %s0) 534; CHECK-NEXT: ld %s1, 8(, %s0) 535; CHECK-NEXT: ld %s0, (, %s0) 536; CHECK-NEXT: ld %s16, 288(, %s17) 537; CHECK-NEXT: lvm %vm1, 0, %s16 538; CHECK-NEXT: ld %s16, 296(, %s17) 539; CHECK-NEXT: lvm %vm1, 1, %s16 540; CHECK-NEXT: ld %s16, 304(, %s17) 541; CHECK-NEXT: lvm %vm1, 2, %s16 542; CHECK-NEXT: ld %s16, 312(, %s17) 543; CHECK-NEXT: lvm %vm1, 3, %s16 544; CHECK-NEXT: svm %s16, %vm1, 0 545; CHECK-NEXT: st %s16, 256(, %s17) 546; CHECK-NEXT: svm %s16, %vm1, 1 547; CHECK-NEXT: st %s16, 264(, %s17) 548; CHECK-NEXT: svm %s16, %vm1, 2 549; CHECK-NEXT: st %s16, 272(, %s17) 550; CHECK-NEXT: svm %s16, %vm1, 3 551; CHECK-NEXT: st %s16, 280(, %s17) # 32-byte Folded Spill 552; CHECK-NEXT: lea %s0, dummy@lo 553; CHECK-NEXT: and %s0, %s0, (32)0 554; CHECK-NEXT: lea.sl %s12, dummy@hi(, %s0) 555; CHECK-NEXT: bsic %s10, (, %s12) 556; CHECK-NEXT: lea %s0, pass@lo 557; CHECK-NEXT: and %s0, %s0, (32)0 558; CHECK-NEXT: lea.sl %s12, pass@hi(, %s0) 559; CHECK-NEXT: or %s0, 0, %s18 560; CHECK-NEXT: bsic %s10, (, %s12) 561; CHECK-NEXT: ld %s16, 256(, %s17) 562; CHECK-NEXT: lvm %vm1, 0, %s16 563; CHECK-NEXT: ld %s16, 264(, %s17) 564; CHECK-NEXT: lvm %vm1, 1, %s16 565; CHECK-NEXT: ld %s16, 272(, %s17) 566; CHECK-NEXT: lvm %vm1, 2, %s16 567; CHECK-NEXT: ld %s16, 280(, %s17) # 32-byte Folded Reload 568; CHECK-NEXT: lvm %vm1, 3, %s16 569; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload 570; CHECK-NEXT: or %s11, 0, %s9 571; CHECK-NEXT: ld %s17, 40(, %s11) 572; CHECK-NEXT: ld %s10, 8(, %s11) 573; CHECK-NEXT: ld %s9, (, %s11) 574; CHECK-NEXT: b.l.t (, %s10) 575 %2 = alloca <256 x i1>, align 32 576 call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %2) 577 %3 = alloca i8, i64 %0, align 8 578 %4 = load volatile <256 x i1>, ptr %3, align 32 579 %5 = load volatile <256 x i1>, ptr %2, align 32 580 tail call fastcc void @dummy() 581 tail call fastcc void @pass(i64 noundef %0) 582 call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %2) 583 ret <256 x i1> %5 584} 585 586declare fastcc void @dummy() 587 588declare fastcc void @pass(i64 noundef) 589 590; Function Attrs: argmemonly mustprogress nofree nounwind willreturn 591define fastcc <512 x i1> @load__vm512_stk() { 592; CHECK-LABEL: load__vm512_stk: 593; CHECK: # %bb.0: 594; CHECK-NEXT: st %s9, (, %s11) 595; CHECK-NEXT: st %s10, 8(, %s11) 596; CHECK-NEXT: or %s9, 0, %s11 597; CHECK-NEXT: lea %s11, -256(, %s11) 598; CHECK-NEXT: and %s11, %s11, (58)1 599; CHECK-NEXT: brge.l.t %s11, %s8, .LBB8_2 600; CHECK-NEXT: # %bb.1: 601; CHECK-NEXT: ld %s61, 24(, %s14) 602; CHECK-NEXT: or %s62, 0, %s0 603; CHECK-NEXT: lea %s63, 315 604; CHECK-NEXT: shm.l %s63, (%s61) 605; CHECK-NEXT: shm.l %s8, 8(%s61) 606; CHECK-NEXT: shm.l %s11, 16(%s61) 607; CHECK-NEXT: monc 608; CHECK-NEXT: or %s0, 0, %s62 609; CHECK-NEXT: .LBB8_2: 610; CHECK-NEXT: # implicit-def: $vmp1 611; CHECK-NEXT: ld %s16, 192(, %s11) 612; CHECK-NEXT: lvm %vm3, 0, %s16 613; CHECK-NEXT: ld %s16, 200(, %s11) 614; CHECK-NEXT: lvm %vm3, 1, %s16 615; CHECK-NEXT: ld %s16, 208(, %s11) 616; CHECK-NEXT: lvm %vm3, 2, %s16 617; CHECK-NEXT: ld %s16, 216(, %s11) 618; CHECK-NEXT: lvm %vm3, 3, %s16 619; CHECK-NEXT: ld %s16, 224(, %s11) 620; CHECK-NEXT: lvm %vm2, 0, %s16 621; CHECK-NEXT: ld %s16, 232(, %s11) 622; CHECK-NEXT: lvm %vm2, 1, %s16 623; CHECK-NEXT: ld %s16, 240(, %s11) 624; CHECK-NEXT: lvm %vm2, 2, %s16 625; CHECK-NEXT: ld %s16, 248(, %s11) 626; CHECK-NEXT: lvm %vm2, 3, %s16 627; CHECK-NEXT: or %s11, 0, %s9 628; CHECK-NEXT: ld %s10, 8(, %s11) 629; CHECK-NEXT: ld %s9, (, %s11) 630; CHECK-NEXT: b.l.t (, %s10) 631 %1 = alloca <512 x i1>, align 64 632 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %1) 633 %2 = load volatile <512 x i1>, ptr %1, align 64 634 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %1) 635 ret <512 x i1> %2 636} 637 638; Function Attrs: argmemonly nofree nounwind 639define fastcc <512 x i1> @load__vm512_stk_big_fit() { 640; CHECK-LABEL: load__vm512_stk_big_fit: 641; CHECK: # %bb.0: 642; CHECK-NEXT: st %s9, (, %s11) 643; CHECK-NEXT: st %s10, 8(, %s11) 644; CHECK-NEXT: or %s9, 0, %s11 645; CHECK-NEXT: lea %s11, -2147483648(, %s11) 646; CHECK-NEXT: and %s11, %s11, (58)1 647; CHECK-NEXT: brge.l %s11, %s8, .LBB9_4 648; CHECK-NEXT: # %bb.3: 649; CHECK-NEXT: ld %s61, 24(, %s14) 650; CHECK-NEXT: or %s62, 0, %s0 651; CHECK-NEXT: lea %s63, 315 652; CHECK-NEXT: shm.l %s63, (%s61) 653; CHECK-NEXT: shm.l %s8, 8(%s61) 654; CHECK-NEXT: shm.l %s11, 16(%s61) 655; CHECK-NEXT: monc 656; CHECK-NEXT: or %s0, 0, %s62 657; CHECK-NEXT: .LBB9_4: 658; CHECK-NEXT: # implicit-def: $vmp1 659; CHECK-NEXT: ld %s16, 2147483584(, %s11) 660; CHECK-NEXT: lvm %vm3, 0, %s16 661; CHECK-NEXT: ld %s16, 2147483592(, %s11) 662; CHECK-NEXT: lvm %vm3, 1, %s16 663; CHECK-NEXT: ld %s16, 2147483600(, %s11) 664; CHECK-NEXT: lvm %vm3, 2, %s16 665; CHECK-NEXT: ld %s16, 2147483608(, %s11) 666; CHECK-NEXT: lvm %vm3, 3, %s16 667; CHECK-NEXT: ld %s16, 2147483616(, %s11) 668; CHECK-NEXT: lvm %vm2, 0, %s16 669; CHECK-NEXT: ld %s16, 2147483624(, %s11) 670; CHECK-NEXT: lvm %vm2, 1, %s16 671; CHECK-NEXT: ld %s16, 2147483632(, %s11) 672; CHECK-NEXT: lvm %vm2, 2, %s16 673; CHECK-NEXT: ld %s16, 2147483640(, %s11) 674; CHECK-NEXT: lvm %vm2, 3, %s16 675; CHECK-NEXT: or %s0, 0, (0)1 676; CHECK-NEXT: lea %s1, 2147483392 677; CHECK-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 678; CHECK-NEXT: ld %s2, 192(%s0, %s11) 679; CHECK-NEXT: lea %s0, 8(, %s0) 680; CHECK-NEXT: brne.l %s0, %s1, .LBB9_1 681; CHECK-NEXT: # %bb.2: 682; CHECK-NEXT: or %s11, 0, %s9 683; CHECK-NEXT: ld %s10, 8(, %s11) 684; CHECK-NEXT: ld %s9, (, %s11) 685; CHECK-NEXT: b.l.t (, %s10) 686 %1 = alloca <512 x i1>, align 64 687 %2 = alloca [268435424 x i64], align 8 688 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %1) 689 call void @llvm.lifetime.start.p0(i64 2147483392, ptr nonnull %2) 690 %3 = load volatile <512 x i1>, ptr %1, align 64 691 br label %5 692 6934: ; preds = %5 694 call void @llvm.lifetime.end.p0(i64 2147483392, ptr nonnull %2) 695 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %1) 696 ret <512 x i1> %3 697 6985: ; preds = %0, %5 699 %6 = phi i64 [ 0, %0 ], [ %9, %5 ] 700 %7 = getelementptr inbounds [268435424 x i64], ptr %2, i64 0, i64 %6 701 %8 = load volatile i64, ptr %7, align 8, !tbaa !3 702 %9 = add nuw nsw i64 %6, 1 703 %10 = icmp eq i64 %9, 268435424 704 br i1 %10, label %4, label %5, !llvm.loop !11 705} 706 707; Function Attrs: argmemonly nofree nounwind 708define fastcc <512 x i1> @load__vm512_stk_big() { 709; CHECK-LABEL: load__vm512_stk_big: 710; CHECK: # %bb.0: 711; CHECK-NEXT: st %s9, (, %s11) 712; CHECK-NEXT: st %s10, 8(, %s11) 713; CHECK-NEXT: or %s9, 0, %s11 714; CHECK-NEXT: lea %s13, 2147483584 715; CHECK-NEXT: and %s13, %s13, (32)0 716; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) 717; CHECK-NEXT: and %s11, %s11, (58)1 718; CHECK-NEXT: brge.l %s11, %s8, .LBB10_4 719; CHECK-NEXT: # %bb.3: 720; CHECK-NEXT: ld %s61, 24(, %s14) 721; CHECK-NEXT: or %s62, 0, %s0 722; CHECK-NEXT: lea %s63, 315 723; CHECK-NEXT: shm.l %s63, (%s61) 724; CHECK-NEXT: shm.l %s8, 8(%s61) 725; CHECK-NEXT: shm.l %s11, 16(%s61) 726; CHECK-NEXT: monc 727; CHECK-NEXT: or %s0, 0, %s62 728; CHECK-NEXT: .LBB10_4: 729; CHECK-NEXT: lea %s13, -2147483648 730; CHECK-NEXT: and %s13, %s13, (32)0 731; CHECK-NEXT: lea.sl %s13, (%s11, %s13) 732; CHECK-NEXT: # implicit-def: $vmp1 733; CHECK-NEXT: ld %s16, (, %s13) 734; CHECK-NEXT: lvm %vm3, 0, %s16 735; CHECK-NEXT: ld %s16, 8(, %s13) 736; CHECK-NEXT: lvm %vm3, 1, %s16 737; CHECK-NEXT: ld %s16, 16(, %s13) 738; CHECK-NEXT: lvm %vm3, 2, %s16 739; CHECK-NEXT: ld %s16, 24(, %s13) 740; CHECK-NEXT: lvm %vm3, 3, %s16 741; CHECK-NEXT: ld %s16, 32(, %s13) 742; CHECK-NEXT: lvm %vm2, 0, %s16 743; CHECK-NEXT: ld %s16, 40(, %s13) 744; CHECK-NEXT: lvm %vm2, 1, %s16 745; CHECK-NEXT: ld %s16, 48(, %s13) 746; CHECK-NEXT: lvm %vm2, 2, %s16 747; CHECK-NEXT: ld %s16, 56(, %s13) 748; CHECK-NEXT: lvm %vm2, 3, %s16 749; CHECK-NEXT: or %s0, 0, (0)1 750; CHECK-NEXT: lea %s1, 2147483400 751; CHECK-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 752; CHECK-NEXT: ld %s2, 248(%s0, %s11) 753; CHECK-NEXT: lea %s0, 8(, %s0) 754; CHECK-NEXT: brne.l %s0, %s1, .LBB10_1 755; CHECK-NEXT: # %bb.2: 756; CHECK-NEXT: or %s11, 0, %s9 757; CHECK-NEXT: ld %s10, 8(, %s11) 758; CHECK-NEXT: ld %s9, (, %s11) 759; CHECK-NEXT: b.l.t (, %s10) 760 %1 = alloca <512 x i1>, align 64 761 %2 = alloca [268435425 x i64], align 8 762 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %1) 763 call void @llvm.lifetime.start.p0(i64 2147483400, ptr nonnull %2) 764 %3 = load volatile <512 x i1>, ptr %1, align 64 765 br label %5 766 7674: ; preds = %5 768 call void @llvm.lifetime.end.p0(i64 2147483400, ptr nonnull %2) 769 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %1) 770 ret <512 x i1> %3 771 7725: ; preds = %0, %5 773 %6 = phi i64 [ 0, %0 ], [ %9, %5 ] 774 %7 = getelementptr inbounds [268435425 x i64], ptr %2, i64 0, i64 %6 775 %8 = load volatile i64, ptr %7, align 8, !tbaa !3 776 %9 = add nuw nsw i64 %6, 1 777 %10 = icmp eq i64 %9, 268435425 778 br i1 %10, label %4, label %5, !llvm.loop !12 779} 780 781; Function Attrs: argmemonly nofree nounwind 782define fastcc <512 x i1> @load__vm512_stk_big2() { 783; CHECK-LABEL: load__vm512_stk_big2: 784; CHECK: # %bb.0: 785; CHECK-NEXT: st %s9, (, %s11) 786; CHECK-NEXT: st %s10, 8(, %s11) 787; CHECK-NEXT: or %s9, 0, %s11 788; CHECK-NEXT: lea %s13, 2147483392 789; CHECK-NEXT: and %s13, %s13, (32)0 790; CHECK-NEXT: lea.sl %s11, -1(%s13, %s11) 791; CHECK-NEXT: and %s11, %s11, (58)1 792; CHECK-NEXT: brge.l %s11, %s8, .LBB11_4 793; CHECK-NEXT: # %bb.3: 794; CHECK-NEXT: ld %s61, 24(, %s14) 795; CHECK-NEXT: or %s62, 0, %s0 796; CHECK-NEXT: lea %s63, 315 797; CHECK-NEXT: shm.l %s63, (%s61) 798; CHECK-NEXT: shm.l %s8, 8(%s61) 799; CHECK-NEXT: shm.l %s11, 16(%s61) 800; CHECK-NEXT: monc 801; CHECK-NEXT: or %s0, 0, %s62 802; CHECK-NEXT: .LBB11_4: 803; CHECK-NEXT: lea %s13, -2147483456 804; CHECK-NEXT: and %s13, %s13, (32)0 805; CHECK-NEXT: lea.sl %s13, (%s11, %s13) 806; CHECK-NEXT: # implicit-def: $vmp1 807; CHECK-NEXT: ld %s16, (, %s13) 808; CHECK-NEXT: lvm %vm3, 0, %s16 809; CHECK-NEXT: ld %s16, 8(, %s13) 810; CHECK-NEXT: lvm %vm3, 1, %s16 811; CHECK-NEXT: ld %s16, 16(, %s13) 812; CHECK-NEXT: lvm %vm3, 2, %s16 813; CHECK-NEXT: ld %s16, 24(, %s13) 814; CHECK-NEXT: lvm %vm3, 3, %s16 815; CHECK-NEXT: ld %s16, 32(, %s13) 816; CHECK-NEXT: lvm %vm2, 0, %s16 817; CHECK-NEXT: ld %s16, 40(, %s13) 818; CHECK-NEXT: lvm %vm2, 1, %s16 819; CHECK-NEXT: ld %s16, 48(, %s13) 820; CHECK-NEXT: lvm %vm2, 2, %s16 821; CHECK-NEXT: ld %s16, 56(, %s13) 822; CHECK-NEXT: lvm %vm2, 3, %s16 823; CHECK-NEXT: or %s0, 0, (0)1 824; CHECK-NEXT: lea %s1, -2147483648 825; CHECK-NEXT: and %s1, %s1, (32)0 826; CHECK-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 827; CHECK-NEXT: ld %s2, 192(%s0, %s11) 828; CHECK-NEXT: lea %s0, 8(, %s0) 829; CHECK-NEXT: brne.l %s0, %s1, .LBB11_1 830; CHECK-NEXT: # %bb.2: 831; CHECK-NEXT: or %s11, 0, %s9 832; CHECK-NEXT: ld %s10, 8(, %s11) 833; CHECK-NEXT: ld %s9, (, %s11) 834; CHECK-NEXT: b.l.t (, %s10) 835 %1 = alloca <512 x i1>, align 64 836 %2 = alloca [268435456 x i64], align 8 837 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %1) 838 call void @llvm.lifetime.start.p0(i64 2147483648, ptr nonnull %2) 839 %3 = load volatile <512 x i1>, ptr %1, align 64 840 br label %5 841 8424: ; preds = %5 843 call void @llvm.lifetime.end.p0(i64 2147483648, ptr nonnull %2) 844 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %1) 845 ret <512 x i1> %3 846 8475: ; preds = %0, %5 848 %6 = phi i64 [ 0, %0 ], [ %9, %5 ] 849 %7 = getelementptr inbounds [268435456 x i64], ptr %2, i64 0, i64 %6 850 %8 = load volatile i64, ptr %7, align 8, !tbaa !3 851 %9 = add nuw nsw i64 %6, 1 852 %10 = icmp eq i64 %9, 268435456 853 br i1 %10, label %4, label %5, !llvm.loop !13 854} 855 856; Function Attrs: argmemonly mustprogress nofree nounwind willreturn 857define fastcc <512 x i1> @load__vm512_stk_dyn(i64 noundef %0) { 858; CHECK-LABEL: load__vm512_stk_dyn: 859; CHECK: # %bb.0: 860; CHECK-NEXT: st %s9, (, %s11) 861; CHECK-NEXT: st %s10, 8(, %s11) 862; CHECK-NEXT: st %s17, 40(, %s11) 863; CHECK-NEXT: or %s9, 0, %s11 864; CHECK-NEXT: lea %s11, -320(, %s11) 865; CHECK-NEXT: and %s11, %s11, (58)1 866; CHECK-NEXT: or %s17, 0, %s11 867; CHECK-NEXT: brge.l.t %s11, %s8, .LBB12_2 868; CHECK-NEXT: # %bb.1: 869; CHECK-NEXT: ld %s61, 24(, %s14) 870; CHECK-NEXT: or %s62, 0, %s0 871; CHECK-NEXT: lea %s63, 315 872; CHECK-NEXT: shm.l %s63, (%s61) 873; CHECK-NEXT: shm.l %s8, 8(%s61) 874; CHECK-NEXT: shm.l %s11, 16(%s61) 875; CHECK-NEXT: monc 876; CHECK-NEXT: or %s0, 0, %s62 877; CHECK-NEXT: .LBB12_2: 878; CHECK-NEXT: sll %s0, %s0, 6 879; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 880; CHECK-NEXT: and %s1, %s1, (32)0 881; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 882; CHECK-NEXT: bsic %s10, (, %s12) 883; CHECK-NEXT: lea %s0, 240(, %s11) 884; CHECK-NEXT: ld %s1, 56(, %s0) 885; CHECK-NEXT: ld %s1, 48(, %s0) 886; CHECK-NEXT: ld %s1, 40(, %s0) 887; CHECK-NEXT: ld %s1, 32(, %s0) 888; CHECK-NEXT: ld %s1, 24(, %s0) 889; CHECK-NEXT: ld %s1, 16(, %s0) 890; CHECK-NEXT: ld %s1, 8(, %s0) 891; CHECK-NEXT: ld %s0, (, %s0) 892; CHECK-NEXT: # implicit-def: $vmp1 893; CHECK-NEXT: ld %s16, 256(, %s17) 894; CHECK-NEXT: lvm %vm3, 0, %s16 895; CHECK-NEXT: ld %s16, 264(, %s17) 896; CHECK-NEXT: lvm %vm3, 1, %s16 897; CHECK-NEXT: ld %s16, 272(, %s17) 898; CHECK-NEXT: lvm %vm3, 2, %s16 899; CHECK-NEXT: ld %s16, 280(, %s17) 900; CHECK-NEXT: lvm %vm3, 3, %s16 901; CHECK-NEXT: ld %s16, 288(, %s17) 902; CHECK-NEXT: lvm %vm2, 0, %s16 903; CHECK-NEXT: ld %s16, 296(, %s17) 904; CHECK-NEXT: lvm %vm2, 1, %s16 905; CHECK-NEXT: ld %s16, 304(, %s17) 906; CHECK-NEXT: lvm %vm2, 2, %s16 907; CHECK-NEXT: ld %s16, 312(, %s17) 908; CHECK-NEXT: lvm %vm2, 3, %s16 909; CHECK-NEXT: or %s11, 0, %s9 910; CHECK-NEXT: ld %s17, 40(, %s11) 911; CHECK-NEXT: ld %s10, 8(, %s11) 912; CHECK-NEXT: ld %s9, (, %s11) 913; CHECK-NEXT: b.l.t (, %s10) 914 %2 = alloca <512 x i1>, align 64 915 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2) 916 %3 = alloca <512 x i1>, i64 %0, align 8 917 %4 = load volatile <512 x i1>, ptr %3, align 64 918 %5 = load volatile <512 x i1>, ptr %2, align 64 919 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2) 920 ret <512 x i1> %5 921} 922 923; Function Attrs: argmemonly mustprogress nofree nounwind willreturn 924define fastcc <512 x i1> @load__vm512_stk_dyn_align(i64 noundef %0) { 925; CHECK-LABEL: load__vm512_stk_dyn_align: 926; CHECK: # %bb.0: 927; CHECK-NEXT: st %s9, (, %s11) 928; CHECK-NEXT: st %s10, 8(, %s11) 929; CHECK-NEXT: st %s17, 40(, %s11) 930; CHECK-NEXT: or %s9, 0, %s11 931; CHECK-NEXT: lea %s11, -320(, %s11) 932; CHECK-NEXT: and %s11, %s11, (59)1 933; CHECK-NEXT: or %s17, 0, %s11 934; CHECK-NEXT: brge.l.t %s11, %s8, .LBB13_2 935; CHECK-NEXT: # %bb.1: 936; CHECK-NEXT: ld %s61, 24(, %s14) 937; CHECK-NEXT: or %s62, 0, %s0 938; CHECK-NEXT: lea %s63, 315 939; CHECK-NEXT: shm.l %s63, (%s61) 940; CHECK-NEXT: shm.l %s8, 8(%s61) 941; CHECK-NEXT: shm.l %s11, 16(%s61) 942; CHECK-NEXT: monc 943; CHECK-NEXT: or %s0, 0, %s62 944; CHECK-NEXT: .LBB13_2: 945; CHECK-NEXT: sll %s0, %s0, 6 946; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 947; CHECK-NEXT: and %s1, %s1, (32)0 948; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 949; CHECK-NEXT: bsic %s10, (, %s12) 950; CHECK-NEXT: lea %s0, 240(, %s11) 951; CHECK-NEXT: ld %s1, 56(, %s0) 952; CHECK-NEXT: ld %s1, 48(, %s0) 953; CHECK-NEXT: ld %s1, 40(, %s0) 954; CHECK-NEXT: ld %s1, 32(, %s0) 955; CHECK-NEXT: ld %s1, 24(, %s0) 956; CHECK-NEXT: ld %s1, 16(, %s0) 957; CHECK-NEXT: ld %s1, 8(, %s0) 958; CHECK-NEXT: ld %s0, (, %s0) 959; CHECK-NEXT: # implicit-def: $vmp1 960; CHECK-NEXT: ld %s16, 256(, %s17) 961; CHECK-NEXT: lvm %vm3, 0, %s16 962; CHECK-NEXT: ld %s16, 264(, %s17) 963; CHECK-NEXT: lvm %vm3, 1, %s16 964; CHECK-NEXT: ld %s16, 272(, %s17) 965; CHECK-NEXT: lvm %vm3, 2, %s16 966; CHECK-NEXT: ld %s16, 280(, %s17) 967; CHECK-NEXT: lvm %vm3, 3, %s16 968; CHECK-NEXT: ld %s16, 288(, %s17) 969; CHECK-NEXT: lvm %vm2, 0, %s16 970; CHECK-NEXT: ld %s16, 296(, %s17) 971; CHECK-NEXT: lvm %vm2, 1, %s16 972; CHECK-NEXT: ld %s16, 304(, %s17) 973; CHECK-NEXT: lvm %vm2, 2, %s16 974; CHECK-NEXT: ld %s16, 312(, %s17) 975; CHECK-NEXT: lvm %vm2, 3, %s16 976; CHECK-NEXT: or %s11, 0, %s9 977; CHECK-NEXT: ld %s17, 40(, %s11) 978; CHECK-NEXT: ld %s10, 8(, %s11) 979; CHECK-NEXT: ld %s9, (, %s11) 980; CHECK-NEXT: b.l.t (, %s10) 981 %2 = alloca <512 x i1>, align 32 982 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2) 983 %3 = alloca <512 x i1>, i64 %0, align 8 984 %4 = load volatile <512 x i1>, ptr %3, align 64 985 %5 = load volatile <512 x i1>, ptr %2, align 32 986 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2) 987 ret <512 x i1> %5 988} 989 990; Function Attrs: argmemonly mustprogress nofree nounwind willreturn 991define fastcc <512 x i1> @load__vm512_stk_dyn_align2(i64 noundef %0) { 992; CHECK-LABEL: load__vm512_stk_dyn_align2: 993; CHECK: # %bb.0: 994; CHECK-NEXT: st %s9, (, %s11) 995; CHECK-NEXT: st %s10, 8(, %s11) 996; CHECK-NEXT: st %s17, 40(, %s11) 997; CHECK-NEXT: or %s9, 0, %s11 998; CHECK-NEXT: lea %s11, -384(, %s11) 999; CHECK-NEXT: and %s11, %s11, (58)1 1000; CHECK-NEXT: or %s17, 0, %s11 1001; CHECK-NEXT: brge.l.t %s11, %s8, .LBB14_2 1002; CHECK-NEXT: # %bb.1: 1003; CHECK-NEXT: ld %s61, 24(, %s14) 1004; CHECK-NEXT: or %s62, 0, %s0 1005; CHECK-NEXT: lea %s63, 315 1006; CHECK-NEXT: shm.l %s63, (%s61) 1007; CHECK-NEXT: shm.l %s8, 8(%s61) 1008; CHECK-NEXT: shm.l %s11, 16(%s61) 1009; CHECK-NEXT: monc 1010; CHECK-NEXT: or %s0, 0, %s62 1011; CHECK-NEXT: .LBB14_2: 1012; CHECK-NEXT: sll %s0, %s0, 6 1013; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 1014; CHECK-NEXT: and %s1, %s1, (32)0 1015; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 1016; CHECK-NEXT: bsic %s10, (, %s12) 1017; CHECK-NEXT: lea %s0, 240(, %s11) 1018; CHECK-NEXT: ld %s1, 56(, %s0) 1019; CHECK-NEXT: ld %s1, 48(, %s0) 1020; CHECK-NEXT: ld %s1, 40(, %s0) 1021; CHECK-NEXT: ld %s1, 32(, %s0) 1022; CHECK-NEXT: ld %s1, 24(, %s0) 1023; CHECK-NEXT: ld %s1, 16(, %s0) 1024; CHECK-NEXT: ld %s1, 8(, %s0) 1025; CHECK-NEXT: ld %s0, (, %s0) 1026; CHECK-NEXT: # implicit-def: $vmp1 1027; CHECK-NEXT: ld %s16, 320(, %s17) 1028; CHECK-NEXT: lvm %vm3, 0, %s16 1029; CHECK-NEXT: ld %s16, 328(, %s17) 1030; CHECK-NEXT: lvm %vm3, 1, %s16 1031; CHECK-NEXT: ld %s16, 336(, %s17) 1032; CHECK-NEXT: lvm %vm3, 2, %s16 1033; CHECK-NEXT: ld %s16, 344(, %s17) 1034; CHECK-NEXT: lvm %vm3, 3, %s16 1035; CHECK-NEXT: ld %s16, 352(, %s17) 1036; CHECK-NEXT: lvm %vm2, 0, %s16 1037; CHECK-NEXT: ld %s16, 360(, %s17) 1038; CHECK-NEXT: lvm %vm2, 1, %s16 1039; CHECK-NEXT: ld %s16, 368(, %s17) 1040; CHECK-NEXT: lvm %vm2, 2, %s16 1041; CHECK-NEXT: ld %s16, 376(, %s17) 1042; CHECK-NEXT: lvm %vm2, 3, %s16 1043; CHECK-NEXT: # implicit-def: $vmp2 1044; CHECK-NEXT: ld %s16, 256(, %s17) 1045; CHECK-NEXT: lvm %vm5, 0, %s16 1046; CHECK-NEXT: ld %s16, 264(, %s17) 1047; CHECK-NEXT: lvm %vm5, 1, %s16 1048; CHECK-NEXT: ld %s16, 272(, %s17) 1049; CHECK-NEXT: lvm %vm5, 2, %s16 1050; CHECK-NEXT: ld %s16, 280(, %s17) 1051; CHECK-NEXT: lvm %vm5, 3, %s16 1052; CHECK-NEXT: ld %s16, 288(, %s17) 1053; CHECK-NEXT: lvm %vm4, 0, %s16 1054; CHECK-NEXT: ld %s16, 296(, %s17) 1055; CHECK-NEXT: lvm %vm4, 1, %s16 1056; CHECK-NEXT: ld %s16, 304(, %s17) 1057; CHECK-NEXT: lvm %vm4, 2, %s16 1058; CHECK-NEXT: ld %s16, 312(, %s17) 1059; CHECK-NEXT: lvm %vm4, 3, %s16 1060; CHECK-NEXT: or %s11, 0, %s9 1061; CHECK-NEXT: ld %s17, 40(, %s11) 1062; CHECK-NEXT: ld %s10, 8(, %s11) 1063; CHECK-NEXT: ld %s9, (, %s11) 1064; CHECK-NEXT: b.l.t (, %s10) 1065 %2 = alloca <512 x i1>, align 32 1066 %3 = alloca <512 x i1>, align 64 1067 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2) 1068 %4 = alloca <512 x i1>, i64 %0, align 8 1069 %5 = load volatile <512 x i1>, ptr %4, align 64 1070 %6 = load volatile <512 x i1>, ptr %2, align 32 1071 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %3) 1072 %7 = load volatile <512 x i1>, ptr %3, align 64 1073 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %3) 1074 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2) 1075 ret <512 x i1> %6 1076} 1077 1078; Function Attrs: nounwind 1079define fastcc <512 x i1> @load__vm512_stk_dyn_align_spill(i64 noundef %0) { 1080; CHECK-LABEL: load__vm512_stk_dyn_align_spill: 1081; CHECK: # %bb.0: 1082; CHECK-NEXT: st %s9, (, %s11) 1083; CHECK-NEXT: st %s10, 8(, %s11) 1084; CHECK-NEXT: st %s17, 40(, %s11) 1085; CHECK-NEXT: or %s9, 0, %s11 1086; CHECK-NEXT: lea %s11, -384(, %s11) 1087; CHECK-NEXT: and %s11, %s11, (59)1 1088; CHECK-NEXT: or %s17, 0, %s11 1089; CHECK-NEXT: brge.l.t %s11, %s8, .LBB15_2 1090; CHECK-NEXT: # %bb.1: 1091; CHECK-NEXT: ld %s61, 24(, %s14) 1092; CHECK-NEXT: or %s62, 0, %s0 1093; CHECK-NEXT: lea %s63, 315 1094; CHECK-NEXT: shm.l %s63, (%s61) 1095; CHECK-NEXT: shm.l %s8, 8(%s61) 1096; CHECK-NEXT: shm.l %s11, 16(%s61) 1097; CHECK-NEXT: monc 1098; CHECK-NEXT: or %s0, 0, %s62 1099; CHECK-NEXT: .LBB15_2: 1100; CHECK-NEXT: st %s18, 48(, %s9) # 8-byte Folded Spill 1101; CHECK-NEXT: or %s18, 0, %s0 1102; CHECK-NEXT: sll %s0, %s0, 6 1103; CHECK-NEXT: lea %s1, __ve_grow_stack@lo 1104; CHECK-NEXT: and %s1, %s1, (32)0 1105; CHECK-NEXT: lea.sl %s12, __ve_grow_stack@hi(, %s1) 1106; CHECK-NEXT: bsic %s10, (, %s12) 1107; CHECK-NEXT: lea %s0, 240(, %s11) 1108; CHECK-NEXT: ld %s1, 56(, %s0) 1109; CHECK-NEXT: ld %s1, 48(, %s0) 1110; CHECK-NEXT: ld %s1, 40(, %s0) 1111; CHECK-NEXT: ld %s1, 32(, %s0) 1112; CHECK-NEXT: ld %s1, 24(, %s0) 1113; CHECK-NEXT: ld %s1, 16(, %s0) 1114; CHECK-NEXT: ld %s1, 8(, %s0) 1115; CHECK-NEXT: ld %s0, (, %s0) 1116; CHECK-NEXT: # implicit-def: $vmp1 1117; CHECK-NEXT: ld %s16, 320(, %s17) 1118; CHECK-NEXT: lvm %vm3, 0, %s16 1119; CHECK-NEXT: ld %s16, 328(, %s17) 1120; CHECK-NEXT: lvm %vm3, 1, %s16 1121; CHECK-NEXT: ld %s16, 336(, %s17) 1122; CHECK-NEXT: lvm %vm3, 2, %s16 1123; CHECK-NEXT: ld %s16, 344(, %s17) 1124; CHECK-NEXT: lvm %vm3, 3, %s16 1125; CHECK-NEXT: ld %s16, 352(, %s17) 1126; CHECK-NEXT: lvm %vm2, 0, %s16 1127; CHECK-NEXT: ld %s16, 360(, %s17) 1128; CHECK-NEXT: lvm %vm2, 1, %s16 1129; CHECK-NEXT: ld %s16, 368(, %s17) 1130; CHECK-NEXT: lvm %vm2, 2, %s16 1131; CHECK-NEXT: ld %s16, 376(, %s17) 1132; CHECK-NEXT: lvm %vm2, 3, %s16 1133; CHECK-NEXT: svm %s16, %vm3, 0 1134; CHECK-NEXT: st %s16, 256(, %s17) 1135; CHECK-NEXT: svm %s16, %vm3, 1 1136; CHECK-NEXT: st %s16, 264(, %s17) 1137; CHECK-NEXT: svm %s16, %vm3, 2 1138; CHECK-NEXT: st %s16, 272(, %s17) 1139; CHECK-NEXT: svm %s16, %vm3, 3 1140; CHECK-NEXT: st %s16, 280(, %s17) 1141; CHECK-NEXT: svm %s16, %vm2, 0 1142; CHECK-NEXT: st %s16, 288(, %s17) 1143; CHECK-NEXT: svm %s16, %vm2, 1 1144; CHECK-NEXT: st %s16, 296(, %s17) 1145; CHECK-NEXT: svm %s16, %vm2, 2 1146; CHECK-NEXT: st %s16, 304(, %s17) 1147; CHECK-NEXT: svm %s16, %vm2, 3 1148; CHECK-NEXT: st %s16, 312(, %s17) # 64-byte Folded Spill 1149; CHECK-NEXT: lea %s0, dummy@lo 1150; CHECK-NEXT: and %s0, %s0, (32)0 1151; CHECK-NEXT: lea.sl %s12, dummy@hi(, %s0) 1152; CHECK-NEXT: bsic %s10, (, %s12) 1153; CHECK-NEXT: lea %s0, pass@lo 1154; CHECK-NEXT: and %s0, %s0, (32)0 1155; CHECK-NEXT: lea.sl %s12, pass@hi(, %s0) 1156; CHECK-NEXT: or %s0, 0, %s18 1157; CHECK-NEXT: bsic %s10, (, %s12) 1158; CHECK-NEXT: # implicit-def: $vmp1 1159; CHECK-NEXT: ld %s16, 256(, %s17) 1160; CHECK-NEXT: lvm %vm3, 0, %s16 1161; CHECK-NEXT: ld %s16, 264(, %s17) 1162; CHECK-NEXT: lvm %vm3, 1, %s16 1163; CHECK-NEXT: ld %s16, 272(, %s17) 1164; CHECK-NEXT: lvm %vm3, 2, %s16 1165; CHECK-NEXT: ld %s16, 280(, %s17) 1166; CHECK-NEXT: lvm %vm3, 3, %s16 1167; CHECK-NEXT: ld %s16, 288(, %s17) 1168; CHECK-NEXT: lvm %vm2, 0, %s16 1169; CHECK-NEXT: ld %s16, 296(, %s17) 1170; CHECK-NEXT: lvm %vm2, 1, %s16 1171; CHECK-NEXT: ld %s16, 304(, %s17) 1172; CHECK-NEXT: lvm %vm2, 2, %s16 1173; CHECK-NEXT: ld %s16, 312(, %s17) # 64-byte Folded Reload 1174; CHECK-NEXT: lvm %vm2, 3, %s16 1175; CHECK-NEXT: ld %s18, 48(, %s9) # 8-byte Folded Reload 1176; CHECK-NEXT: or %s11, 0, %s9 1177; CHECK-NEXT: ld %s17, 40(, %s11) 1178; CHECK-NEXT: ld %s10, 8(, %s11) 1179; CHECK-NEXT: ld %s9, (, %s11) 1180; CHECK-NEXT: b.l.t (, %s10) 1181 %2 = alloca <512 x i1>, align 32 1182 call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %2) 1183 %3 = alloca <512 x i1>, i64 %0, align 8 1184 %4 = load volatile <512 x i1>, ptr %3, align 64 1185 %5 = load volatile <512 x i1>, ptr %2, align 32 1186 tail call fastcc void @dummy() 1187 tail call fastcc void @pass(i64 noundef %0) 1188 call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %2) 1189 ret <512 x i1> %5 1190} 1191 1192!2 = !{!"clang version 15.0.0 (git@kaz7.github.com:sx-aurora-dev/llvm-project.git 50263c9e9cc3714bcd816eaea8822d3e010a0f19)"} 1193!3 = !{!4, !4, i64 0} 1194!4 = !{!"long", !5, i64 0} 1195!5 = !{!"omnipotent char", !6, i64 0} 1196!6 = !{!"Simple C/C++ TBAA"} 1197!7 = distinct !{!7, !8} 1198!8 = !{!"llvm.loop.mustprogress"} 1199!9 = distinct !{!9, !8} 1200!10 = distinct !{!10, !8} 1201!11 = distinct !{!11, !8} 1202!12 = distinct !{!12, !8} 1203!13 = distinct !{!13, !8} 1204