1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -aarch64-streaming-hazard-size=0 -mattr=+sve -mattr=+sme -verify-machineinstrs < %s | FileCheck %s 3 4; This file tests the following combinations related to streaming-enabled functions: 5; [ ] N -> S (Normal -> Streaming) 6; [ ] S -> N (Streaming -> Normal) 7; [ ] S -> S (Streaming -> Streaming) 8; [ ] S -> SC (Streaming -> Streaming-compatible) 9; 10; The following combination is tested in sme-streaming-compatible-interface.ll 11; [ ] SC -> S (Streaming-compatible -> Streaming) 12 13declare void @normal_callee() 14declare void @streaming_callee() "aarch64_pstate_sm_enabled" 15declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible" 16 17; [x] N -> S 18; [ ] S -> N 19; [ ] S -> S 20; [ ] S -> SC 21define void @normal_caller_streaming_callee() nounwind { 22; CHECK-LABEL: normal_caller_streaming_callee: 23; CHECK: // %bb.0: 24; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill 25; CHECK-NEXT: cntd x9 26; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 27; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 28; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 29; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill 30; CHECK-NEXT: smstart sm 31; CHECK-NEXT: bl streaming_callee 32; CHECK-NEXT: smstop sm 33; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 34; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 35; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 36; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 37; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload 38; CHECK-NEXT: ret 39 call void @streaming_callee() 40 ret void; 41} 42 43; [ ] N -> S 44; [x] S -> N 45; [ ] S -> S 46; [ ] S -> SC 47define void @streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_enabled" { 48; CHECK-LABEL: streaming_caller_normal_callee: 49; CHECK: // %bb.0: 50; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill 51; CHECK-NEXT: cntd x9 52; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 53; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 54; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 55; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill 56; CHECK-NEXT: smstop sm 57; CHECK-NEXT: bl normal_callee 58; CHECK-NEXT: smstart sm 59; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 60; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 61; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 62; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 63; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload 64; CHECK-NEXT: ret 65 call void @normal_callee() 66 ret void; 67} 68 69; [ ] N -> S 70; [ ] S -> N 71; [x] S -> S 72; [ ] S -> SC 73define void @streaming_caller_streaming_callee() nounwind "aarch64_pstate_sm_enabled" { 74; CHECK-LABEL: streaming_caller_streaming_callee: 75; CHECK: // %bb.0: 76; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 77; CHECK-NEXT: bl streaming_callee 78; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 79; CHECK-NEXT: ret 80 call void @streaming_callee() 81 ret void; 82} 83 84; [ ] N -> S 85; [ ] S -> N 86; [ ] S -> S 87; [x] S -> SC 88define void @streaming_caller_streaming_compatible_callee() nounwind "aarch64_pstate_sm_enabled" { 89; CHECK-LABEL: streaming_caller_streaming_compatible_callee: 90; CHECK: // %bb.0: 91; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 92; CHECK-NEXT: bl streaming_compatible_callee 93; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 94; CHECK-NEXT: ret 95 call void @streaming_compatible_callee() 96 ret void; 97} 98 99; 100; Handle special cases here. 101; 102 103; Call to function-pointer (with attribute) 104define void @call_to_function_pointer_streaming_enabled(ptr %p) nounwind { 105; CHECK-LABEL: call_to_function_pointer_streaming_enabled: 106; CHECK: // %bb.0: 107; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill 108; CHECK-NEXT: cntd x9 109; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 110; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 111; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 112; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill 113; CHECK-NEXT: smstart sm 114; CHECK-NEXT: blr x0 115; CHECK-NEXT: smstop sm 116; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 117; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 118; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 119; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 120; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload 121; CHECK-NEXT: ret 122 call void %p() "aarch64_pstate_sm_enabled" 123 ret void 124} 125 126; Ensure NEON registers are preserved correctly. 127define <4 x i32> @smstart_clobber_simdfp(<4 x i32> %x) nounwind { 128; CHECK-LABEL: smstart_clobber_simdfp: 129; CHECK: // %bb.0: 130; CHECK-NEXT: sub sp, sp, #96 131; CHECK-NEXT: cntd x9 132; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill 133; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill 134; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill 135; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill 136; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill 137; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill 138; CHECK-NEXT: smstart sm 139; CHECK-NEXT: bl streaming_callee 140; CHECK-NEXT: smstop sm 141; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload 142; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload 143; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload 144; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload 145; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload 146; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload 147; CHECK-NEXT: add sp, sp, #96 148; CHECK-NEXT: ret 149 call void @streaming_callee() 150 ret <4 x i32> %x; 151} 152 153; Ensure SVE registers are preserved correctly. 154define <vscale x 4 x i32> @smstart_clobber_sve(<vscale x 4 x i32> %x) nounwind { 155; CHECK-LABEL: smstart_clobber_sve: 156; CHECK: // %bb.0: 157; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill 158; CHECK-NEXT: cntd x9 159; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill 160; CHECK-NEXT: addvl sp, sp, #-18 161; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill 162; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill 163; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill 164; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill 165; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill 166; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill 167; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill 168; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill 169; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill 170; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill 171; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill 172; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill 173; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill 174; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill 175; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill 176; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill 177; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill 178; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill 179; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill 180; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill 181; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill 182; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill 183; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill 184; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill 185; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill 186; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill 187; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill 188; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill 189; CHECK-NEXT: addvl sp, sp, #-1 190; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill 191; CHECK-NEXT: smstart sm 192; CHECK-NEXT: bl streaming_callee 193; CHECK-NEXT: smstop sm 194; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload 195; CHECK-NEXT: addvl sp, sp, #1 196; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload 197; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload 198; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload 199; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload 200; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload 201; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload 202; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload 203; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload 204; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload 205; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload 206; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload 207; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload 208; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload 209; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload 210; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload 211; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload 212; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload 213; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload 214; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload 215; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload 216; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload 217; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload 218; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload 219; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload 220; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload 221; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload 222; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload 223; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload 224; CHECK-NEXT: addvl sp, sp, #18 225; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload 226; CHECK-NEXT: ret 227 call void @streaming_callee() 228 ret <vscale x 4 x i32> %x; 229} 230 231; Call streaming callee twice; there should be no spills/fills between the two 232; calls since the registers should have already been clobbered. 233define <vscale x 4 x i32> @smstart_clobber_sve_duplicate(<vscale x 4 x i32> %x) nounwind { 234; CHECK-LABEL: smstart_clobber_sve_duplicate: 235; CHECK: // %bb.0: 236; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill 237; CHECK-NEXT: cntd x9 238; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill 239; CHECK-NEXT: addvl sp, sp, #-18 240; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill 241; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill 242; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill 243; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill 244; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill 245; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill 246; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill 247; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill 248; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill 249; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill 250; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill 251; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill 252; CHECK-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill 253; CHECK-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill 254; CHECK-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill 255; CHECK-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill 256; CHECK-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill 257; CHECK-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill 258; CHECK-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill 259; CHECK-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill 260; CHECK-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill 261; CHECK-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill 262; CHECK-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill 263; CHECK-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill 264; CHECK-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill 265; CHECK-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill 266; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill 267; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill 268; CHECK-NEXT: addvl sp, sp, #-1 269; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill 270; CHECK-NEXT: smstart sm 271; CHECK-NEXT: bl streaming_callee 272; CHECK-NEXT: bl streaming_callee 273; CHECK-NEXT: smstop sm 274; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload 275; CHECK-NEXT: addvl sp, sp, #1 276; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload 277; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload 278; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload 279; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload 280; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload 281; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload 282; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload 283; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload 284; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload 285; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload 286; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload 287; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload 288; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload 289; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload 290; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload 291; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload 292; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload 293; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload 294; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload 295; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload 296; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload 297; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload 298; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload 299; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload 300; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload 301; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload 302; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload 303; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload 304; CHECK-NEXT: addvl sp, sp, #18 305; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload 306; CHECK-NEXT: ret 307 call void @streaming_callee() 308 call void @streaming_callee() 309 ret <vscale x 4 x i32> %x; 310} 311 312; Ensure smstart is not removed, because call to llvm.cos is not part of a chain. 313define double @call_to_intrinsic_without_chain(double %x) nounwind "aarch64_pstate_sm_enabled" { 314; CHECK-LABEL: call_to_intrinsic_without_chain: 315; CHECK: // %bb.0: // %entry 316; CHECK-NEXT: sub sp, sp, #96 317; CHECK-NEXT: cntd x9 318; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill 319; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill 320; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill 321; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill 322; CHECK-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill 323; CHECK-NEXT: stp d0, d0, [sp] // 16-byte Folded Spill 324; CHECK-NEXT: smstop sm 325; CHECK-NEXT: ldr d0, [sp] // 8-byte Folded Reload 326; CHECK-NEXT: bl cos 327; CHECK-NEXT: str d0, [sp] // 8-byte Folded Spill 328; CHECK-NEXT: smstart sm 329; CHECK-NEXT: ldp d1, d0, [sp] // 16-byte Folded Reload 330; CHECK-NEXT: fadd d0, d1, d0 331; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload 332; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload 333; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload 334; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload 335; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload 336; CHECK-NEXT: add sp, sp, #96 337; CHECK-NEXT: ret 338entry: 339 %res = call fast double @llvm.cos.f64(double %x) 340 %res.fadd = fadd fast double %res, %x 341 ret double %res.fadd 342} 343 344declare double @llvm.cos.f64(double) 345 346; Ensure that tail call optimization is disabled when the streaming mode 347; doesn't match. 348define void @disable_tailcallopt() nounwind { 349; CHECK-LABEL: disable_tailcallopt: 350; CHECK: // %bb.0: 351; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill 352; CHECK-NEXT: cntd x9 353; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 354; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 355; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 356; CHECK-NEXT: stp x30, x9, [sp, #64] // 16-byte Folded Spill 357; CHECK-NEXT: smstart sm 358; CHECK-NEXT: bl streaming_callee 359; CHECK-NEXT: smstop sm 360; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 361; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 362; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 363; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 364; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload 365; CHECK-NEXT: ret 366 tail call void @streaming_callee() 367 ret void; 368} 369 370define i8 @call_to_non_streaming_pass_sve_objects(ptr nocapture noundef readnone %ptr) #0 { 371; CHECK-LABEL: call_to_non_streaming_pass_sve_objects: 372; CHECK: // %bb.0: // %entry 373; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 374; CHECK-NEXT: cntd x9 375; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 376; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 377; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 378; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill 379; CHECK-NEXT: str x9, [sp, #80] // 8-byte Folded Spill 380; CHECK-NEXT: addvl sp, sp, #-3 381; CHECK-NEXT: rdsvl x3, #1 382; CHECK-NEXT: addvl x0, sp, #2 383; CHECK-NEXT: addvl x1, sp, #1 384; CHECK-NEXT: mov x2, sp 385; CHECK-NEXT: smstop sm 386; CHECK-NEXT: bl foo 387; CHECK-NEXT: smstart sm 388; CHECK-NEXT: ptrue p0.b 389; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp, #2, mul vl] 390; CHECK-NEXT: fmov w0, s0 391; CHECK-NEXT: addvl sp, sp, #3 392; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload 393; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 394; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 395; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 396; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 397; CHECK-NEXT: ret 398entry: 399 %Data1 = alloca <vscale x 16 x i8>, align 16 400 %Data2 = alloca <vscale x 16 x i8>, align 16 401 %Data3 = alloca <vscale x 16 x i8>, align 16 402 %0 = tail call i64 @llvm.aarch64.sme.cntsb() 403 call void @foo(ptr noundef nonnull %Data1, ptr noundef nonnull %Data2, ptr noundef nonnull %Data3, i64 noundef %0) 404 %1 = load <vscale x 16 x i8>, ptr %Data1, align 16 405 %vecext = extractelement <vscale x 16 x i8> %1, i64 0 406 ret i8 %vecext 407} 408 409define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr, i64 %long1, i64 %long2, i32 %int1, i32 %int2, float %float1, float %float2, double %double1, double %double2) #0 { 410; CHECK-LABEL: call_to_non_streaming_pass_args: 411; CHECK: // %bb.0: // %entry 412; CHECK-NEXT: sub sp, sp, #112 413; CHECK-NEXT: cntd x9 414; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill 415; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill 416; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill 417; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill 418; CHECK-NEXT: stp x30, x9, [sp, #96] // 16-byte Folded Spill 419; CHECK-NEXT: stp d2, d3, [sp, #16] // 16-byte Folded Spill 420; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill 421; CHECK-NEXT: smstop sm 422; CHECK-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload 423; CHECK-NEXT: ldp d2, d3, [sp, #16] // 16-byte Folded Reload 424; CHECK-NEXT: bl bar 425; CHECK-NEXT: smstart sm 426; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload 427; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload 428; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload 429; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload 430; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload 431; CHECK-NEXT: add sp, sp, #112 432; CHECK-NEXT: ret 433entry: 434 call void @bar(ptr noundef nonnull %ptr, i64 %long1, i64 %long2, i32 %int1, i32 %int2, float %float1, float %float2, double %double1, double %double2) 435 ret void 436} 437 438declare i64 @llvm.aarch64.sme.cntsb() 439 440declare void @foo(ptr noundef, ptr noundef, ptr noundef, i64 noundef) 441declare void @bar(ptr noundef, i64 noundef, i64 noundef, i32 noundef, i32 noundef, float noundef, float noundef, double noundef, double noundef) 442 443attributes #0 = { nounwind vscale_range(1,16) "aarch64_pstate_sm_enabled" } 444