1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -verify-machineinstrs -mattr=+sve -mattr=+sme < %s | FileCheck %s 3 4; This file tests the following combinations related to streaming-enabled functions: 5; [ ] N -> SC (Normal -> Streaming-compatible) 6; [ ] SC -> N (Streaming-compatible -> Normal) 7; [ ] SC -> S (Streaming-compatible -> Streaming) 8; [ ] SC -> SC (Streaming-compatible -> Streaming-compatible) 9; 10; The following combination is tested in sme-streaming-interface.ll 11; [ ] S -> SC (Streaming -> Streaming-compatible) 12 13declare void @normal_callee(); 14declare void @streaming_callee() "aarch64_pstate_sm_enabled"; 15declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"; 16 17; [x] N -> SC (Normal -> Streaming-compatible) 18; [ ] SC -> N (Streaming-compatible -> Normal) 19; [ ] SC -> S (Streaming-compatible -> Streaming) 20; [ ] SC -> SC (Streaming-compatible -> Streaming-compatible) 21define void @normal_caller_streaming_compatible_callee() nounwind { 22; CHECK-LABEL: normal_caller_streaming_compatible_callee: 23; CHECK: // %bb.0: 24; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 25; CHECK-NEXT: bl streaming_compatible_callee 26; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 27; CHECK-NEXT: ret 28 call void @streaming_compatible_callee(); 29 ret void; 30} 31 32; [ ] N -> SC (Normal -> Streaming-compatible) 33; [x] SC -> N (Streaming-compatible -> Normal) 34; [ ] SC -> S (Streaming-compatible -> Streaming) 35; [ ] SC -> SC (Streaming-compatible -> Streaming-compatible) 36define void @streaming_compatible_caller_normal_callee() "aarch64_pstate_sm_compatible" nounwind { 37; CHECK-LABEL: streaming_compatible_caller_normal_callee: 38; CHECK: // %bb.0: 39; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 40; CHECK-NEXT: cntd x9 41; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 42; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 43; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 44; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill 45; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill 46; CHECK-NEXT: bl __arm_sme_state 47; CHECK-NEXT: and x19, x0, #0x1 48; CHECK-NEXT: tbz w19, #0, .LBB1_2 49; CHECK-NEXT: // %bb.1: 50; CHECK-NEXT: smstop sm 51; CHECK-NEXT: .LBB1_2: 52; CHECK-NEXT: bl normal_callee 53; CHECK-NEXT: tbz w19, #0, .LBB1_4 54; CHECK-NEXT: // %bb.3: 55; CHECK-NEXT: smstart sm 56; CHECK-NEXT: .LBB1_4: 57; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 58; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload 59; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 60; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 61; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 62; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 63; CHECK-NEXT: ret 64 65 call void @normal_callee(); 66 ret void; 67} 68 69; Streaming Compatible Caller, Streaming Callee 70 71; [ ] N -> SC (Normal -> Streaming-compatible) 72; [ ] SC -> N (Streaming-compatible -> Normal) 73; [x] SC -> S (Streaming-compatible -> Streaming) 74; [ ] SC -> SC (Streaming-compatible -> Streaming-compatible) 75define void @streaming_compatible_caller_streaming_callee() "aarch64_pstate_sm_compatible" nounwind { 76; CHECK-LABEL: streaming_compatible_caller_streaming_callee: 77; CHECK: // %bb.0: 78; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 79; CHECK-NEXT: cntd x9 80; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 81; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 82; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 83; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill 84; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill 85; CHECK-NEXT: bl __arm_sme_state 86; CHECK-NEXT: and x19, x0, #0x1 87; CHECK-NEXT: tbnz w19, #0, .LBB2_2 88; CHECK-NEXT: // %bb.1: 89; CHECK-NEXT: smstart sm 90; CHECK-NEXT: .LBB2_2: 91; CHECK-NEXT: bl streaming_callee 92; CHECK-NEXT: tbnz w19, #0, .LBB2_4 93; CHECK-NEXT: // %bb.3: 94; CHECK-NEXT: smstop sm 95; CHECK-NEXT: .LBB2_4: 96; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 97; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload 98; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 99; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 100; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 101; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 102; CHECK-NEXT: ret 103 104 call void @streaming_callee(); 105 ret void; 106} 107 108; [ ] N -> SC (Normal -> Streaming-compatible) 109; [ ] SC -> N (Streaming-compatible -> Normal) 110; [ ] SC -> S (Streaming-compatible -> Streaming) 111; [x] SC -> SC (Streaming-compatible -> Streaming-compatible) 112define void @streaming_compatible_caller_and_callee() "aarch64_pstate_sm_compatible" nounwind { 113; CHECK-LABEL: streaming_compatible_caller_and_callee: 114; CHECK: // %bb.0: 115; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 116; CHECK-NEXT: bl streaming_compatible_callee 117; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 118; CHECK-NEXT: ret 119 120 call void @streaming_compatible_callee(); 121 ret void; 122} 123 124 125; 126; Handle special cases here. 127; 128 129define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "aarch64_pstate_sm_compatible" nounwind { 130; CHECK-LABEL: streaming_compatible_with_neon_vectors: 131; CHECK: // %bb.0: 132; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 133; CHECK-NEXT: cntd x9 134; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 135; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 136; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 137; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill 138; CHECK-NEXT: stp x9, x19, [sp, #80] // 16-byte Folded Spill 139; CHECK-NEXT: sub sp, sp, #16 140; CHECK-NEXT: addvl sp, sp, #-1 141; CHECK-NEXT: add x8, sp, #16 142; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 143; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill 144; CHECK-NEXT: bl __arm_sme_state 145; CHECK-NEXT: add x8, sp, #16 146; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload 147; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 148; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill 149; CHECK-NEXT: and x19, x0, #0x1 150; CHECK-NEXT: tbz w19, #0, .LBB4_2 151; CHECK-NEXT: // %bb.1: 152; CHECK-NEXT: smstop sm 153; CHECK-NEXT: .LBB4_2: 154; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload 155; CHECK-NEXT: bl normal_callee_vec_arg 156; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill 157; CHECK-NEXT: tbz w19, #0, .LBB4_4 158; CHECK-NEXT: // %bb.3: 159; CHECK-NEXT: smstart sm 160; CHECK-NEXT: .LBB4_4: 161; CHECK-NEXT: add x8, sp, #16 162; CHECK-NEXT: ptrue p0.d, vl2 163; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload 164; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload 165; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 166; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d 167; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 168; CHECK-NEXT: addvl sp, sp, #1 169; CHECK-NEXT: add sp, sp, #16 170; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload 171; CHECK-NEXT: ldr x19, [sp, #88] // 8-byte Folded Reload 172; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 173; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 174; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 175; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 176; CHECK-NEXT: ret 177 %res = call <2 x double> @normal_callee_vec_arg(<2 x double> %arg) 178 %fadd = fadd <2 x double> %res, %arg 179 ret <2 x double> %fadd 180} 181declare <2 x double> @normal_callee_vec_arg(<2 x double>) 182 183define <vscale x 2 x double> @streaming_compatible_with_scalable_vectors(<vscale x 2 x double> %arg) "aarch64_pstate_sm_compatible" nounwind { 184; CHECK-LABEL: streaming_compatible_with_scalable_vectors: 185; CHECK: // %bb.0: 186; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill 187; CHECK-NEXT: cntd x9 188; CHECK-NEXT: stp x9, x19, [sp, #16] // 16-byte Folded Spill 189; CHECK-NEXT: addvl sp, sp, #-18 190; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill 191; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill 192; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill 193; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill 194; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill 195; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill 196; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill 197; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill 198; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill 199; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill 200; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill 201; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill 202; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill 203; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill 204; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill 205; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill 206; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill 207; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill 208; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill 209; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill 210; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill 211; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill 212; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill 213; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill 214; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill 215; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill 216; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill 217; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill 218; CHECK-NEXT: addvl sp, sp, #-2 219; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill 220; CHECK-NEXT: bl __arm_sme_state 221; CHECK-NEXT: and x19, x0, #0x1 222; CHECK-NEXT: tbz w19, #0, .LBB5_2 223; CHECK-NEXT: // %bb.1: 224; CHECK-NEXT: smstop sm 225; CHECK-NEXT: .LBB5_2: 226; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload 227; CHECK-NEXT: bl normal_callee_scalable_vec_arg 228; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill 229; CHECK-NEXT: tbz w19, #0, .LBB5_4 230; CHECK-NEXT: // %bb.3: 231; CHECK-NEXT: smstart sm 232; CHECK-NEXT: .LBB5_4: 233; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload 234; CHECK-NEXT: ldr z1, [sp] // 16-byte Folded Reload 235; CHECK-NEXT: fadd z0.d, z1.d, z0.d 236; CHECK-NEXT: addvl sp, sp, #2 237; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload 238; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload 239; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload 240; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload 241; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload 242; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload 243; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload 244; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload 245; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload 246; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload 247; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload 248; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload 249; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload 250; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload 251; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload 252; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload 253; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload 254; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload 255; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload 256; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload 257; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload 258; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload 259; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload 260; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload 261; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload 262; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload 263; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload 264; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload 265; CHECK-NEXT: addvl sp, sp, #18 266; CHECK-NEXT: ldr x19, [sp, #24] // 8-byte Folded Reload 267; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload 268; CHECK-NEXT: ret 269 %res = call <vscale x 2 x double> @normal_callee_scalable_vec_arg(<vscale x 2 x double> %arg) 270 %fadd = fadd <vscale x 2 x double> %res, %arg 271 ret <vscale x 2 x double> %fadd 272} 273 274declare <vscale x 2 x double> @normal_callee_scalable_vec_arg(<vscale x 2 x double>) 275 276define <vscale x 2 x i1> @streaming_compatible_with_predicate_vectors(<vscale x 2 x i1> %arg) "aarch64_pstate_sm_compatible" nounwind { 277; CHECK-LABEL: streaming_compatible_with_predicate_vectors: 278; CHECK: // %bb.0: 279; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill 280; CHECK-NEXT: cntd x9 281; CHECK-NEXT: stp x9, x19, [sp, #16] // 16-byte Folded Spill 282; CHECK-NEXT: addvl sp, sp, #-18 283; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill 284; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill 285; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill 286; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill 287; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill 288; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill 289; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill 290; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill 291; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill 292; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill 293; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill 294; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill 295; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill 296; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill 297; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill 298; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill 299; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill 300; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill 301; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill 302; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill 303; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill 304; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill 305; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill 306; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill 307; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill 308; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill 309; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill 310; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill 311; CHECK-NEXT: addvl sp, sp, #-1 312; CHECK-NEXT: str p0, [sp, #7, mul vl] // 2-byte Folded Spill 313; CHECK-NEXT: bl __arm_sme_state 314; CHECK-NEXT: and x19, x0, #0x1 315; CHECK-NEXT: tbz w19, #0, .LBB6_2 316; CHECK-NEXT: // %bb.1: 317; CHECK-NEXT: smstop sm 318; CHECK-NEXT: .LBB6_2: 319; CHECK-NEXT: ldr p0, [sp, #7, mul vl] // 2-byte Folded Reload 320; CHECK-NEXT: bl normal_callee_predicate_vec_arg 321; CHECK-NEXT: str p0, [sp, #6, mul vl] // 2-byte Folded Spill 322; CHECK-NEXT: tbz w19, #0, .LBB6_4 323; CHECK-NEXT: // %bb.3: 324; CHECK-NEXT: smstart sm 325; CHECK-NEXT: .LBB6_4: 326; CHECK-NEXT: ldr p0, [sp, #7, mul vl] // 2-byte Folded Reload 327; CHECK-NEXT: ldr p1, [sp, #6, mul vl] // 2-byte Folded Reload 328; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b 329; CHECK-NEXT: addvl sp, sp, #1 330; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload 331; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload 332; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload 333; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload 334; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload 335; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload 336; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload 337; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload 338; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload 339; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload 340; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload 341; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload 342; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload 343; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload 344; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload 345; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload 346; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload 347; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload 348; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload 349; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload 350; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload 351; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload 352; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload 353; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload 354; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload 355; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload 356; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload 357; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload 358; CHECK-NEXT: addvl sp, sp, #18 359; CHECK-NEXT: ldr x19, [sp, #24] // 8-byte Folded Reload 360; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload 361; CHECK-NEXT: ret 362 %res = call <vscale x 2 x i1> @normal_callee_predicate_vec_arg(<vscale x 2 x i1> %arg) 363 %and = and <vscale x 2 x i1> %res, %arg 364 ret <vscale x 2 x i1> %and 365} 366 367declare <vscale x 2 x i1> @normal_callee_predicate_vec_arg(<vscale x 2 x i1>) 368 369define i32 @conditional_smstart_unreachable_block() "aarch64_pstate_sm_compatible" nounwind { 370; CHECK-LABEL: conditional_smstart_unreachable_block: 371; CHECK: // %bb.0: 372; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 373; CHECK-NEXT: cntd x9 374; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 375; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 376; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 377; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill 378; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill 379; CHECK-NEXT: bl __arm_sme_state 380; CHECK-NEXT: and x19, x0, #0x1 381; CHECK-NEXT: tbnz w19, #0, .LBB7_2 382; CHECK-NEXT: // %bb.1: 383; CHECK-NEXT: smstart sm 384; CHECK-NEXT: .LBB7_2: 385; CHECK-NEXT: bl streaming_callee 386; CHECK-NEXT: tbnz w19, #0, .LBB7_4 387; CHECK-NEXT: // %bb.3: 388; CHECK-NEXT: smstop sm 389; CHECK-NEXT: .LBB7_4: 390 call void @streaming_callee() 391 unreachable 392} 393 394define void @conditional_smstart_no_successor_block(i1 %p) "aarch64_pstate_sm_compatible" nounwind { 395; CHECK-LABEL: conditional_smstart_no_successor_block: 396; CHECK: // %bb.0: 397; CHECK-NEXT: tbz w0, #0, .LBB8_6 398; CHECK-NEXT: // %bb.1: // %if.then 399; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 400; CHECK-NEXT: cntd x9 401; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 402; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 403; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 404; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill 405; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill 406; CHECK-NEXT: bl __arm_sme_state 407; CHECK-NEXT: and x19, x0, #0x1 408; CHECK-NEXT: tbnz w19, #0, .LBB8_3 409; CHECK-NEXT: // %bb.2: // %if.then 410; CHECK-NEXT: smstart sm 411; CHECK-NEXT: .LBB8_3: // %if.then 412; CHECK-NEXT: bl streaming_callee 413; CHECK-NEXT: tbnz w19, #0, .LBB8_5 414; CHECK-NEXT: // %bb.4: // %if.then 415; CHECK-NEXT: smstop sm 416; CHECK-NEXT: .LBB8_5: // %if.then 417; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 418; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload 419; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 420; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 421; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 422; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 423; CHECK-NEXT: .LBB8_6: // %exit 424; CHECK-NEXT: ret 425 br i1 %p, label %if.then, label %exit 426 427if.then: 428 call void @streaming_callee() 429 br label %exit 430 431exit: 432 ret void 433} 434 435define void @disable_tailcallopt() "aarch64_pstate_sm_compatible" nounwind { 436; CHECK-LABEL: disable_tailcallopt: 437; CHECK: // %bb.0: 438; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 439; CHECK-NEXT: cntd x9 440; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 441; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 442; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 443; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill 444; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill 445; CHECK-NEXT: bl __arm_sme_state 446; CHECK-NEXT: and x19, x0, #0x1 447; CHECK-NEXT: tbz w19, #0, .LBB9_2 448; CHECK-NEXT: // %bb.1: 449; CHECK-NEXT: smstop sm 450; CHECK-NEXT: .LBB9_2: 451; CHECK-NEXT: bl normal_callee 452; CHECK-NEXT: tbz w19, #0, .LBB9_4 453; CHECK-NEXT: // %bb.3: 454; CHECK-NEXT: smstart sm 455; CHECK-NEXT: .LBB9_4: 456; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 457; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload 458; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 459; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 460; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 461; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 462; CHECK-NEXT: ret 463 464 tail call void @normal_callee(); 465 ret void; 466} 467 468define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr, i64 %long1, i64 %long2, i32 %int1, i32 %int2, float %float1, float %float2, double %double1, double %double2) "aarch64_pstate_sm_compatible" { 469; CHECK-LABEL: call_to_non_streaming_pass_args: 470; CHECK: // %bb.0: // %entry 471; CHECK-NEXT: sub sp, sp, #128 472; CHECK-NEXT: .cfi_def_cfa_offset 128 473; CHECK-NEXT: cntd x9 474; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill 475; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill 476; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill 477; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill 478; CHECK-NEXT: stp x30, x9, [sp, #96] // 16-byte Folded Spill 479; CHECK-NEXT: str x19, [sp, #112] // 8-byte Folded Spill 480; CHECK-NEXT: .cfi_offset w19, -16 481; CHECK-NEXT: .cfi_offset w30, -32 482; CHECK-NEXT: .cfi_offset b8, -40 483; CHECK-NEXT: .cfi_offset b9, -48 484; CHECK-NEXT: .cfi_offset b10, -56 485; CHECK-NEXT: .cfi_offset b11, -64 486; CHECK-NEXT: .cfi_offset b12, -72 487; CHECK-NEXT: .cfi_offset b13, -80 488; CHECK-NEXT: .cfi_offset b14, -88 489; CHECK-NEXT: .cfi_offset b15, -96 490; CHECK-NEXT: stp d2, d3, [sp, #16] // 16-byte Folded Spill 491; CHECK-NEXT: mov x8, x1 492; CHECK-NEXT: mov x9, x0 493; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill 494; CHECK-NEXT: bl __arm_sme_state 495; CHECK-NEXT: and x19, x0, #0x1 496; CHECK-NEXT: .cfi_offset vg, -24 497; CHECK-NEXT: tbz w19, #0, .LBB10_2 498; CHECK-NEXT: // %bb.1: // %entry 499; CHECK-NEXT: smstop sm 500; CHECK-NEXT: .LBB10_2: // %entry 501; CHECK-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload 502; CHECK-NEXT: mov x0, x9 503; CHECK-NEXT: ldp d2, d3, [sp, #16] // 16-byte Folded Reload 504; CHECK-NEXT: mov x1, x8 505; CHECK-NEXT: bl bar 506; CHECK-NEXT: tbz w19, #0, .LBB10_4 507; CHECK-NEXT: // %bb.3: // %entry 508; CHECK-NEXT: smstart sm 509; CHECK-NEXT: .LBB10_4: // %entry 510; CHECK-NEXT: .cfi_restore vg 511; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload 512; CHECK-NEXT: ldr x19, [sp, #112] // 8-byte Folded Reload 513; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload 514; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload 515; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload 516; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload 517; CHECK-NEXT: add sp, sp, #128 518; CHECK-NEXT: .cfi_def_cfa_offset 0 519; CHECK-NEXT: .cfi_restore w19 520; CHECK-NEXT: .cfi_restore w30 521; CHECK-NEXT: .cfi_restore b8 522; CHECK-NEXT: .cfi_restore b9 523; CHECK-NEXT: .cfi_restore b10 524; CHECK-NEXT: .cfi_restore b11 525; CHECK-NEXT: .cfi_restore b12 526; CHECK-NEXT: .cfi_restore b13 527; CHECK-NEXT: .cfi_restore b14 528; CHECK-NEXT: .cfi_restore b15 529; CHECK-NEXT: ret 530entry: 531 call void @bar(ptr noundef nonnull %ptr, i64 %long1, i64 %long2, i32 %int1, i32 %int2, float %float1, float %float2, double %double1, double %double2) 532 ret void 533} 534 535declare void @bar(ptr noundef, i64 noundef, i64 noundef, i32 noundef, i32 noundef, float noundef, float noundef, double noundef, double noundef) 536