1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc < %s -mtriple=aarch64 -mattr=+mte -aarch64-order-frame-objects=0 | FileCheck %s 3 4declare void @use(ptr %p) 5declare void @llvm.aarch64.settag(ptr %p, i64 %a) 6declare void @llvm.aarch64.settag.zero(ptr %p, i64 %a) 7 8define void @stg16_16() { 9; CHECK-LABEL: stg16_16: 10; CHECK: // %bb.0: // %entry 11; CHECK-NEXT: sub sp, sp, #32 12; CHECK-NEXT: .cfi_def_cfa_offset 32 13; CHECK-NEXT: st2g sp, [sp], #32 14; CHECK-NEXT: ret 15entry: 16 %a = alloca i8, i32 16, align 16 17 %b = alloca i8, i32 16, align 16 18 call void @llvm.aarch64.settag(ptr %a, i64 16) 19 call void @llvm.aarch64.settag(ptr %b, i64 16) 20 ret void 21} 22 23define i32 @stg16_16_16_16_ret() { 24; CHECK-LABEL: stg16_16_16_16_ret: 25; CHECK: // %bb.0: // %entry 26; CHECK-NEXT: sub sp, sp, #64 27; CHECK-NEXT: .cfi_def_cfa_offset 64 28; CHECK-NEXT: mov w0, wzr 29; CHECK-NEXT: st2g sp, [sp, #32] 30; CHECK-NEXT: st2g sp, [sp], #64 31; CHECK-NEXT: ret 32entry: 33 %a = alloca i8, i32 16, align 16 34 %b = alloca i8, i32 16, align 16 35 %c = alloca i8, i32 16, align 16 36 %d = alloca i8, i32 16, align 16 37 call void @llvm.aarch64.settag(ptr %a, i64 16) 38 call void @llvm.aarch64.settag(ptr %b, i64 16) 39 call void @llvm.aarch64.settag(ptr %c, i64 16) 40 call void @llvm.aarch64.settag(ptr %d, i64 16) 41 ret i32 0 42} 43 44define void @stg16_16_16_16() { 45; CHECK-LABEL: stg16_16_16_16: 46; CHECK: // %bb.0: // %entry 47; CHECK-NEXT: sub sp, sp, #64 48; CHECK-NEXT: .cfi_def_cfa_offset 64 49; CHECK-NEXT: st2g sp, [sp, #32] 50; CHECK-NEXT: st2g sp, [sp], #64 51; CHECK-NEXT: ret 52entry: 53 %a = alloca i8, i32 16, align 16 54 %b = alloca i8, i32 16, align 16 55 %c = alloca i8, i32 16, align 16 56 %d = alloca i8, i32 16, align 16 57 call void @llvm.aarch64.settag(ptr %a, i64 16) 58 call void @llvm.aarch64.settag(ptr %b, i64 16) 59 call void @llvm.aarch64.settag(ptr %c, i64 16) 60 call void @llvm.aarch64.settag(ptr %d, i64 16) 61 ret void 62} 63 64define void @stg128_128_128_128() { 65; CHECK-LABEL: stg128_128_128_128: 66; CHECK: // %bb.0: // %entry 67; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 68; CHECK-NEXT: sub sp, sp, #512 69; CHECK-NEXT: .cfi_def_cfa_offset 528 70; CHECK-NEXT: .cfi_offset w29, -16 71; CHECK-NEXT: mov x8, #512 // =0x200 72; CHECK-NEXT: .LBB3_1: // %entry 73; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 74; CHECK-NEXT: st2g sp, [sp], #32 75; CHECK-NEXT: subs x8, x8, #32 76; CHECK-NEXT: b.ne .LBB3_1 77; CHECK-NEXT: // %bb.2: // %entry 78; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 79; CHECK-NEXT: ret 80entry: 81 %a = alloca i8, i32 128, align 16 82 %b = alloca i8, i32 128, align 16 83 %c = alloca i8, i32 128, align 16 84 %d = alloca i8, i32 128, align 16 85 call void @llvm.aarch64.settag(ptr %a, i64 128) 86 call void @llvm.aarch64.settag(ptr %b, i64 128) 87 call void @llvm.aarch64.settag(ptr %c, i64 128) 88 call void @llvm.aarch64.settag(ptr %d, i64 128) 89 ret void 90} 91 92define void @stg16_512_16() { 93; CHECK-LABEL: stg16_512_16: 94; CHECK: // %bb.0: // %entry 95; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 96; CHECK-NEXT: sub sp, sp, #544 97; CHECK-NEXT: .cfi_def_cfa_offset 560 98; CHECK-NEXT: .cfi_offset w29, -16 99; CHECK-NEXT: mov x8, #544 // =0x220 100; CHECK-NEXT: .LBB4_1: // %entry 101; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 102; CHECK-NEXT: st2g sp, [sp], #32 103; CHECK-NEXT: subs x8, x8, #32 104; CHECK-NEXT: b.ne .LBB4_1 105; CHECK-NEXT: // %bb.2: // %entry 106; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 107; CHECK-NEXT: ret 108entry: 109 %a = alloca i8, i32 16, align 16 110 %b = alloca i8, i32 512, align 16 111 %c = alloca i8, i32 16, align 16 112 call void @llvm.aarch64.settag(ptr %a, i64 16) 113 call void @llvm.aarch64.settag(ptr %b, i64 512) 114 call void @llvm.aarch64.settag(ptr %c, i64 16) 115 ret void 116} 117 118define void @stg512_512_512() { 119; CHECK-LABEL: stg512_512_512: 120; CHECK: // %bb.0: // %entry 121; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 122; CHECK-NEXT: sub sp, sp, #1536 123; CHECK-NEXT: .cfi_def_cfa_offset 1552 124; CHECK-NEXT: .cfi_offset w29, -16 125; CHECK-NEXT: mov x8, #1536 // =0x600 126; CHECK-NEXT: .LBB5_1: // %entry 127; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 128; CHECK-NEXT: st2g sp, [sp], #32 129; CHECK-NEXT: subs x8, x8, #32 130; CHECK-NEXT: b.ne .LBB5_1 131; CHECK-NEXT: // %bb.2: // %entry 132; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 133; CHECK-NEXT: ret 134entry: 135 %a = alloca i8, i32 512, align 16 136 %b = alloca i8, i32 512, align 16 137 %c = alloca i8, i32 512, align 16 138 call void @llvm.aarch64.settag(ptr %a, i64 512) 139 call void @llvm.aarch64.settag(ptr %b, i64 512) 140 call void @llvm.aarch64.settag(ptr %c, i64 512) 141 ret void 142} 143 144define void @early(i1 %flag) { 145; CHECK-LABEL: early: 146; CHECK: // %bb.0: // %entry 147; CHECK-NEXT: sub sp, sp, #144 148; CHECK-NEXT: .cfi_def_cfa_offset 144 149; CHECK-NEXT: tbz w0, #0, .LBB6_2 150; CHECK-NEXT: // %bb.1: // %if.then 151; CHECK-NEXT: st2g sp, [sp, #48] 152; CHECK-NEXT: st2g sp, [sp, #80] 153; CHECK-NEXT: st2g sp, [sp, #112] 154; CHECK-NEXT: .LBB6_2: // %if.end 155; CHECK-NEXT: stg sp, [sp, #32] 156; CHECK-NEXT: st2g sp, [sp], #144 157; CHECK-NEXT: ret 158entry: 159 %a = alloca i8, i32 48, align 16 160 %b = alloca i8, i32 48, align 16 161 %c = alloca i8, i32 48, align 16 162 br i1 %flag, label %if.then, label %if.end 163 164if.then: 165 call void @llvm.aarch64.settag(ptr %a, i64 48) 166 call void @llvm.aarch64.settag(ptr %b, i64 48) 167 br label %if.end 168 169if.end: 170 call void @llvm.aarch64.settag(ptr %c, i64 48) 171 ret void 172} 173 174define void @early_128_128(i1 %flag) { 175; CHECK-LABEL: early_128_128: 176; CHECK: // %bb.0: // %entry 177; CHECK-NEXT: sub sp, sp, #320 178; CHECK-NEXT: str x29, [sp, #304] // 8-byte Folded Spill 179; CHECK-NEXT: .cfi_def_cfa_offset 320 180; CHECK-NEXT: .cfi_offset w29, -16 181; CHECK-NEXT: tbz w0, #0, .LBB7_4 182; CHECK-NEXT: // %bb.1: // %if.then 183; CHECK-NEXT: add x9, sp, #48 184; CHECK-NEXT: mov x8, #256 // =0x100 185; CHECK-NEXT: .LBB7_2: // %if.then 186; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 187; CHECK-NEXT: st2g x9, [x9], #32 188; CHECK-NEXT: subs x8, x8, #32 189; CHECK-NEXT: b.ne .LBB7_2 190; CHECK-NEXT: // %bb.3: // %if.then 191; CHECK-NEXT: .LBB7_4: // %if.end 192; CHECK-NEXT: stg sp, [sp, #32] 193; CHECK-NEXT: st2g sp, [sp], #304 194; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 195; CHECK-NEXT: ret 196entry: 197 %a = alloca i8, i32 128, align 16 198 %b = alloca i8, i32 128, align 16 199 %c = alloca i8, i32 48, align 16 200 br i1 %flag, label %if.then, label %if.end 201 202if.then: 203 call void @llvm.aarch64.settag(ptr %a, i64 128) 204 call void @llvm.aarch64.settag(ptr %b, i64 128) 205 br label %if.end 206 207if.end: 208 call void @llvm.aarch64.settag(ptr %c, i64 48) 209 ret void 210} 211 212define void @early_512_512(i1 %flag) { 213; CHECK-LABEL: early_512_512: 214; CHECK: // %bb.0: // %entry 215; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 216; CHECK-NEXT: sub sp, sp, #1072 217; CHECK-NEXT: .cfi_def_cfa_offset 1088 218; CHECK-NEXT: .cfi_offset w29, -16 219; CHECK-NEXT: tbz w0, #0, .LBB8_4 220; CHECK-NEXT: // %bb.1: // %if.then 221; CHECK-NEXT: add x9, sp, #48 222; CHECK-NEXT: mov x8, #1024 // =0x400 223; CHECK-NEXT: .LBB8_2: // %if.then 224; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 225; CHECK-NEXT: st2g x9, [x9], #32 226; CHECK-NEXT: subs x8, x8, #32 227; CHECK-NEXT: b.ne .LBB8_2 228; CHECK-NEXT: // %bb.3: // %if.then 229; CHECK-NEXT: .LBB8_4: // %if.end 230; CHECK-NEXT: stg sp, [sp, #32] 231; CHECK-NEXT: st2g sp, [sp], #1072 232; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 233; CHECK-NEXT: ret 234entry: 235 %a = alloca i8, i32 512, align 16 236 %b = alloca i8, i32 512, align 16 237 %c = alloca i8, i32 48, align 16 238 br i1 %flag, label %if.then, label %if.end 239 240if.then: 241 call void @llvm.aarch64.settag(ptr %a, i64 512) 242 call void @llvm.aarch64.settag(ptr %b, i64 512) 243 br label %if.end 244 245if.end: 246 call void @llvm.aarch64.settag(ptr %c, i64 48) 247 ret void 248} 249 250; Two loops of size 256; the second loop updates SP. 251define void @stg128_128_gap_128_128() { 252; CHECK-LABEL: stg128_128_gap_128_128: 253; CHECK: // %bb.0: // %entry 254; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill 255; CHECK-NEXT: sub sp, sp, #544 256; CHECK-NEXT: .cfi_def_cfa_offset 560 257; CHECK-NEXT: .cfi_offset w30, -8 258; CHECK-NEXT: .cfi_offset w29, -16 259; CHECK-NEXT: add x0, sp, #256 260; CHECK-NEXT: bl use 261; CHECK-NEXT: mov x9, sp 262; CHECK-NEXT: mov x8, #256 // =0x100 263; CHECK-NEXT: .LBB9_1: // %entry 264; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 265; CHECK-NEXT: st2g x9, [x9], #32 266; CHECK-NEXT: subs x8, x8, #32 267; CHECK-NEXT: b.ne .LBB9_1 268; CHECK-NEXT: // %bb.2: // %entry 269; CHECK-NEXT: add sp, sp, #288 270; CHECK-NEXT: mov x8, #256 // =0x100 271; CHECK-NEXT: .LBB9_3: // %entry 272; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 273; CHECK-NEXT: st2g sp, [sp], #32 274; CHECK-NEXT: subs x8, x8, #32 275; CHECK-NEXT: b.ne .LBB9_3 276; CHECK-NEXT: // %bb.4: // %entry 277; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload 278; CHECK-NEXT: ret 279entry: 280 %a = alloca i8, i32 128, align 16 281 %a2 = alloca i8, i32 128, align 16 282 %b = alloca i8, i32 32, align 16 283 %c = alloca i8, i32 128, align 16 284 %c2 = alloca i8, i32 128, align 16 285 call void @use(ptr %b) 286 call void @llvm.aarch64.settag(ptr %a, i64 128) 287 call void @llvm.aarch64.settag(ptr %a2, i64 128) 288 call void @llvm.aarch64.settag(ptr %c, i64 128) 289 call void @llvm.aarch64.settag(ptr %c2, i64 128) 290 ret void 291} 292 293; Function Attrs: nounwind 294declare i32 @printf(ptr, ...) #0 295 296@.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 297 298; Case 1 299; Insert point of stg merge is followed by nzcv read 300; Don't merge in this case 301 302define i32 @nzcv_clobber(i32 %in) { 303entry: 304; CHECK-LABEL: nzcv_clobber: 305; CHECK: stg sp, [sp, #528] 306; CHECK-NEXT: .LBB10_1: 307; CHECK: st2g x9, [x9], #32 308; CHECK-NEXT: subs x8, x8, #32 309; CHECK-NEXT: b.ne .LBB10_1 310; CHECK-NEXT: // %bb.2: 311; CHECK-NEXT: cmp w0, #10 312; CHECK-NEXT: stg sp, [sp] 313; CHECK-NEXT: b.ge .LBB10_4 314 315 %a = alloca i8, i32 16, align 16 316 %b = alloca i8, i32 512, align 16 317 %c = alloca i8, i32 16, align 16 318 call void @llvm.aarch64.settag(ptr %a, i64 16) 319 call void @llvm.aarch64.settag(ptr %b, i64 512) 320 %cmp = icmp slt i32 %in, 10 321 call void @llvm.aarch64.settag(ptr %c, i64 16) 322 br i1 %cmp, label %return0, label %return1 323 324return0: ; preds = %entry 325 %call = call i32 (ptr, ...) @printf(ptr @.str, i32 10) #1 326 ret i32 0 327 328return1: 329 ret i32 1 330} 331 332; Case 2 333; Insert point of stg merge is not followed by nzcv read 334; Merge in this case 335 336define i32 @nzcv_no_clobber(i32 %in) { 337entry: 338; CHECK-LABEL: nzcv_no_clobber: 339; CHECK: mov x8, #544 340; CHECK-NEXT: .LBB11_1: 341; CHECK: st2g sp, [sp], #32 342; CHECK-NEXT: subs x8, x8, #32 343; CHECK-NEXT: b.ne .LBB11_1 344 345 346 %a = alloca i8, i32 16, align 16 347 %b = alloca i8, i32 512, align 16 348 %c = alloca i8, i32 16, align 16 349 call void @llvm.aarch64.settag(ptr %a, i64 16) 350 call void @llvm.aarch64.settag(ptr %b, i64 512) 351 call void @llvm.aarch64.settag(ptr %c, i64 16) 352 br label %return1 353 354return0: ; preds = %entry 355 %call = call i32 (ptr, ...) @printf(ptr @.str, i32 10) #1 356 ret i32 0 357 358return1: 359 ret i32 1 360} 361