1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -fast-isel=true -aarch64-streaming-hazard-size=0 -global-isel=false -fast-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \ 3; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-FISEL 4; RUN: llc -fast-isel=false -aarch64-streaming-hazard-size=0 -global-isel=true -global-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 < %s \ 5; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-GISEL 6 7 8declare double @streaming_callee(double) "aarch64_pstate_sm_enabled" 9declare double @normal_callee(double) 10 11define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline optnone { 12; CHECK-FISEL-LABEL: nonstreaming_caller_streaming_callee: 13; CHECK-FISEL: // %bb.0: // %entry 14; CHECK-FISEL-NEXT: sub sp, sp, #96 15; CHECK-FISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill 16; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill 17; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill 18; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill 19; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill 20; CHECK-FISEL-NEXT: cntd x9 21; CHECK-FISEL-NEXT: str x9, [sp, #88] // 8-byte Folded Spill 22; CHECK-FISEL-NEXT: str d0, [sp] // 8-byte Folded Spill 23; CHECK-FISEL-NEXT: smstart sm 24; CHECK-FISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload 25; CHECK-FISEL-NEXT: bl streaming_callee 26; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill 27; CHECK-FISEL-NEXT: smstop sm 28; CHECK-FISEL-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload 29; CHECK-FISEL-NEXT: adrp x8, .LCPI0_0 30; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI0_0] 31; CHECK-FISEL-NEXT: fadd d0, d1, d0 32; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload 33; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload 34; CHECK-FISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload 35; CHECK-FISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload 36; CHECK-FISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload 37; CHECK-FISEL-NEXT: add sp, sp, #96 38; CHECK-FISEL-NEXT: ret 39; 40; CHECK-GISEL-LABEL: nonstreaming_caller_streaming_callee: 41; CHECK-GISEL: // %bb.0: // %entry 42; CHECK-GISEL-NEXT: sub sp, sp, #96 43; CHECK-GISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill 44; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill 45; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill 46; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill 47; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill 48; CHECK-GISEL-NEXT: cntd x9 49; CHECK-GISEL-NEXT: str x9, [sp, #88] // 8-byte Folded Spill 50; CHECK-GISEL-NEXT: str d0, [sp] // 8-byte Folded Spill 51; CHECK-GISEL-NEXT: smstart sm 52; CHECK-GISEL-NEXT: ldr d0, [sp] // 8-byte Folded Reload 53; CHECK-GISEL-NEXT: bl streaming_callee 54; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill 55; CHECK-GISEL-NEXT: smstop sm 56; CHECK-GISEL-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload 57; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 58; CHECK-GISEL-NEXT: fmov d0, x8 59; CHECK-GISEL-NEXT: fadd d0, d1, d0 60; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload 61; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload 62; CHECK-GISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload 63; CHECK-GISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload 64; CHECK-GISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload 65; CHECK-GISEL-NEXT: add sp, sp, #96 66; CHECK-GISEL-NEXT: ret 67entry: 68 %call = call double @streaming_callee(double %x) "aarch64_pstate_sm_enabled" 69 %add = fadd double %call, 4.200000e+01 70 ret double %add 71} 72 73 74define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_enabled" { 75; CHECK-COMMON-LABEL: streaming_caller_nonstreaming_callee: 76; CHECK-COMMON: // %bb.0: // %entry 77; CHECK-COMMON-NEXT: sub sp, sp, #96 78; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill 79; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill 80; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill 81; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill 82; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill 83; CHECK-COMMON-NEXT: cntd x9 84; CHECK-COMMON-NEXT: str x9, [sp, #88] // 8-byte Folded Spill 85; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill 86; CHECK-COMMON-NEXT: smstop sm 87; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload 88; CHECK-COMMON-NEXT: bl normal_callee 89; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill 90; CHECK-COMMON-NEXT: smstart sm 91; CHECK-COMMON-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload 92; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 93; CHECK-COMMON-NEXT: fmov d0, x8 94; CHECK-COMMON-NEXT: fadd d0, d1, d0 95; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload 96; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload 97; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload 98; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload 99; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload 100; CHECK-COMMON-NEXT: add sp, sp, #96 101; CHECK-COMMON-NEXT: ret 102entry: 103 %call = call double @normal_callee(double %x) 104 %add = fadd double %call, 4.200000e+01 105 ret double %add 106} 107 108define double @locally_streaming_caller_normal_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_body" { 109; CHECK-COMMON-LABEL: locally_streaming_caller_normal_callee: 110; CHECK-COMMON: // %bb.0: 111; CHECK-COMMON-NEXT: sub sp, sp, #128 112; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill 113; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill 114; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill 115; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill 116; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill 117; CHECK-COMMON-NEXT: rdsvl x9, #1 118; CHECK-COMMON-NEXT: lsr x9, x9, #3 119; CHECK-COMMON-NEXT: str x9, [sp, #104] // 8-byte Folded Spill 120; CHECK-COMMON-NEXT: cntd x9 121; CHECK-COMMON-NEXT: str x9, [sp, #112] // 8-byte Folded Spill 122; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Folded Spill 123; CHECK-COMMON-NEXT: smstart sm 124; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload 125; CHECK-COMMON-NEXT: str d0, [sp, #24] // 8-byte Folded Spill 126; CHECK-COMMON-NEXT: smstop sm 127; CHECK-COMMON-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload 128; CHECK-COMMON-NEXT: bl normal_callee 129; CHECK-COMMON-NEXT: str d0, [sp, #16] // 8-byte Folded Spill 130; CHECK-COMMON-NEXT: smstart sm 131; CHECK-COMMON-NEXT: ldr d1, [sp, #16] // 8-byte Folded Reload 132; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 133; CHECK-COMMON-NEXT: fmov d0, x8 134; CHECK-COMMON-NEXT: fadd d0, d1, d0 135; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill 136; CHECK-COMMON-NEXT: smstop sm 137; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload 138; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload 139; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload 140; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload 141; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload 142; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload 143; CHECK-COMMON-NEXT: add sp, sp, #128 144; CHECK-COMMON-NEXT: ret 145 %call = call double @normal_callee(double %x); 146 %add = fadd double %call, 4.200000e+01 147 ret double %add; 148} 149 150define double @normal_caller_to_locally_streaming_callee(double %x) nounwind noinline optnone { 151; CHECK-FISEL-LABEL: normal_caller_to_locally_streaming_callee: 152; CHECK-FISEL: // %bb.0: 153; CHECK-FISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 154; CHECK-FISEL-NEXT: bl locally_streaming_caller_normal_callee 155; CHECK-FISEL-NEXT: adrp x8, .LCPI3_0 156; CHECK-FISEL-NEXT: ldr d1, [x8, :lo12:.LCPI3_0] 157; CHECK-FISEL-NEXT: fadd d0, d0, d1 158; CHECK-FISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 159; CHECK-FISEL-NEXT: ret 160; 161; CHECK-GISEL-LABEL: normal_caller_to_locally_streaming_callee: 162; CHECK-GISEL: // %bb.0: 163; CHECK-GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 164; CHECK-GISEL-NEXT: bl locally_streaming_caller_normal_callee 165; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 166; CHECK-GISEL-NEXT: fmov d1, x8 167; CHECK-GISEL-NEXT: fadd d0, d0, d1 168; CHECK-GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 169; CHECK-GISEL-NEXT: ret 170 %call = call double @locally_streaming_caller_normal_callee(double %x) "aarch64_pstate_sm_body"; 171 %add = fadd double %call, 4.200000e+01 172 ret double %add; 173} 174 175; Check attribute in the call itself 176 177define void @locally_streaming_caller_streaming_callee_ptr(ptr %p) nounwind noinline optnone "aarch64_pstate_sm_body" { 178; CHECK-COMMON-LABEL: locally_streaming_caller_streaming_callee_ptr: 179; CHECK-COMMON: // %bb.0: 180; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill 181; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 182; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 183; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 184; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill 185; CHECK-COMMON-NEXT: rdsvl x9, #1 186; CHECK-COMMON-NEXT: lsr x9, x9, #3 187; CHECK-COMMON-NEXT: str x9, [sp, #72] // 8-byte Folded Spill 188; CHECK-COMMON-NEXT: cntd x9 189; CHECK-COMMON-NEXT: str x9, [sp, #80] // 8-byte Folded Spill 190; CHECK-COMMON-NEXT: smstart sm 191; CHECK-COMMON-NEXT: blr x0 192; CHECK-COMMON-NEXT: smstop sm 193; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 194; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 195; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 196; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 197; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload 198; CHECK-COMMON-NEXT: ret 199 call void %p() "aarch64_pstate_sm_enabled" 200 ret void 201} 202 203define void @normal_call_to_streaming_callee_ptr(ptr %p) nounwind noinline optnone { 204; CHECK-COMMON-LABEL: normal_call_to_streaming_callee_ptr: 205; CHECK-COMMON: // %bb.0: 206; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill 207; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill 208; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 209; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 210; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill 211; CHECK-COMMON-NEXT: cntd x9 212; CHECK-COMMON-NEXT: str x9, [sp, #72] // 8-byte Folded Spill 213; CHECK-COMMON-NEXT: smstart sm 214; CHECK-COMMON-NEXT: blr x0 215; CHECK-COMMON-NEXT: smstop sm 216; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 217; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 218; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 219; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload 220; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload 221; CHECK-COMMON-NEXT: ret 222 call void %p() "aarch64_pstate_sm_enabled" 223 ret void 224} 225 226; 227; Check ZA state 228; 229 230declare double @za_shared_callee(double) "aarch64_inout_za" 231 232define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline optnone "aarch64_new_za"{ 233; CHECK-COMMON-LABEL: za_new_caller_to_za_shared_callee: 234; CHECK-COMMON: // %bb.0: // %prelude 235; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 236; CHECK-COMMON-NEXT: rdsvl x8, #1 237; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 238; CHECK-COMMON-NEXT: cbz x8, .LBB6_2 239; CHECK-COMMON-NEXT: b .LBB6_1 240; CHECK-COMMON-NEXT: .LBB6_1: // %save.za 241; CHECK-COMMON-NEXT: bl __arm_tpidr2_save 242; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr 243; CHECK-COMMON-NEXT: b .LBB6_2 244; CHECK-COMMON-NEXT: .LBB6_2: // %entry 245; CHECK-COMMON-NEXT: smstart za 246; CHECK-COMMON-NEXT: zero {za} 247; CHECK-COMMON-NEXT: bl za_shared_callee 248; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 249; CHECK-COMMON-NEXT: fmov d1, x8 250; CHECK-COMMON-NEXT: fadd d0, d0, d1 251; CHECK-COMMON-NEXT: smstop za 252; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 253; CHECK-COMMON-NEXT: ret 254entry: 255 %call = call double @za_shared_callee(double %x) 256 %add = fadd double %call, 4.200000e+01 257 ret double %add; 258} 259 260define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline optnone "aarch64_inout_za"{ 261; CHECK-COMMON-LABEL: za_shared_caller_to_za_none_callee: 262; CHECK-COMMON: // %bb.0: // %entry 263; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill 264; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill 265; CHECK-COMMON-NEXT: mov x29, sp 266; CHECK-COMMON-NEXT: sub sp, sp, #16 267; CHECK-COMMON-NEXT: rdsvl x8, #1 268; CHECK-COMMON-NEXT: mov x9, sp 269; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 270; CHECK-COMMON-NEXT: mov sp, x9 271; CHECK-COMMON-NEXT: stur x9, [x29, #-16] 272; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6] 273; CHECK-COMMON-NEXT: stur wzr, [x29, #-4] 274; CHECK-COMMON-NEXT: sturh w8, [x29, #-8] 275; CHECK-COMMON-NEXT: sub x8, x29, #16 276; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8 277; CHECK-COMMON-NEXT: bl normal_callee 278; CHECK-COMMON-NEXT: smstart za 279; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 280; CHECK-COMMON-NEXT: sub x0, x29, #16 281; CHECK-COMMON-NEXT: cbz x8, .LBB7_1 282; CHECK-COMMON-NEXT: b .LBB7_2 283; CHECK-COMMON-NEXT: .LBB7_1: // %entry 284; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore 285; CHECK-COMMON-NEXT: b .LBB7_2 286; CHECK-COMMON-NEXT: .LBB7_2: // %entry 287; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr 288; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 289; CHECK-COMMON-NEXT: fmov d1, x8 290; CHECK-COMMON-NEXT: fadd d0, d0, d1 291; CHECK-COMMON-NEXT: mov sp, x29 292; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload 293; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload 294; CHECK-COMMON-NEXT: ret 295entry: 296 %call = call double @normal_callee(double %x) 297 %add = fadd double %call, 4.200000e+01 298 ret double %add; 299} 300 301; Ensure we set up and restore the lazy save correctly for instructions which are lowered to lib calls. 302define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_inout_za" nounwind { 303; CHECK-COMMON-LABEL: f128_call_za: 304; CHECK-COMMON: // %bb.0: 305; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill 306; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill 307; CHECK-COMMON-NEXT: mov x29, sp 308; CHECK-COMMON-NEXT: sub sp, sp, #16 309; CHECK-COMMON-NEXT: rdsvl x8, #1 310; CHECK-COMMON-NEXT: mov x9, sp 311; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 312; CHECK-COMMON-NEXT: mov sp, x9 313; CHECK-COMMON-NEXT: stur x9, [x29, #-16] 314; CHECK-COMMON-NEXT: sub x9, x29, #16 315; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6] 316; CHECK-COMMON-NEXT: stur wzr, [x29, #-4] 317; CHECK-COMMON-NEXT: sturh w8, [x29, #-8] 318; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9 319; CHECK-COMMON-NEXT: bl __addtf3 320; CHECK-COMMON-NEXT: smstart za 321; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 322; CHECK-COMMON-NEXT: sub x0, x29, #16 323; CHECK-COMMON-NEXT: cbnz x8, .LBB8_2 324; CHECK-COMMON-NEXT: // %bb.1: 325; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore 326; CHECK-COMMON-NEXT: .LBB8_2: 327; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr 328; CHECK-COMMON-NEXT: mov sp, x29 329; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload 330; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload 331; CHECK-COMMON-NEXT: ret 332 %res = fadd fp128 %a, %b 333 ret fp128 %res 334} 335 336 337; Ensure we fall back to SelectionDAG isel here so that we temporarily disable streaming mode to lower the fadd (with function calls). 338define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounwind { 339; CHECK-COMMON-LABEL: f128_call_sm: 340; CHECK-COMMON: // %bb.0: 341; CHECK-COMMON-NEXT: sub sp, sp, #112 342; CHECK-COMMON-NEXT: cntd x9 343; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill 344; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill 345; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill 346; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill 347; CHECK-COMMON-NEXT: stp x30, x9, [sp, #96] // 16-byte Folded Spill 348; CHECK-COMMON-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill 349; CHECK-COMMON-NEXT: smstop sm 350; CHECK-COMMON-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 351; CHECK-COMMON-NEXT: bl __addtf3 352; CHECK-COMMON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 353; CHECK-COMMON-NEXT: smstart sm 354; CHECK-COMMON-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 355; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload 356; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload 357; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload 358; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload 359; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload 360; CHECK-COMMON-NEXT: add sp, sp, #112 361; CHECK-COMMON-NEXT: ret 362 %res = fadd fp128 %a, %b 363 ret fp128 %res 364} 365 366; As above this should use Selection DAG to make sure the libcall call is lowered correctly. 367define double @frem_call_za(double %a, double %b) "aarch64_inout_za" nounwind { 368; CHECK-COMMON-LABEL: frem_call_za: 369; CHECK-COMMON: // %bb.0: 370; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill 371; CHECK-COMMON-NEXT: str x19, [sp, #16] // 8-byte Folded Spill 372; CHECK-COMMON-NEXT: mov x29, sp 373; CHECK-COMMON-NEXT: sub sp, sp, #16 374; CHECK-COMMON-NEXT: rdsvl x8, #1 375; CHECK-COMMON-NEXT: mov x9, sp 376; CHECK-COMMON-NEXT: msub x9, x8, x8, x9 377; CHECK-COMMON-NEXT: mov sp, x9 378; CHECK-COMMON-NEXT: stur x9, [x29, #-16] 379; CHECK-COMMON-NEXT: sub x9, x29, #16 380; CHECK-COMMON-NEXT: sturh wzr, [x29, #-6] 381; CHECK-COMMON-NEXT: stur wzr, [x29, #-4] 382; CHECK-COMMON-NEXT: sturh w8, [x29, #-8] 383; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9 384; CHECK-COMMON-NEXT: bl fmod 385; CHECK-COMMON-NEXT: smstart za 386; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 387; CHECK-COMMON-NEXT: sub x0, x29, #16 388; CHECK-COMMON-NEXT: cbnz x8, .LBB10_2 389; CHECK-COMMON-NEXT: // %bb.1: 390; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore 391; CHECK-COMMON-NEXT: .LBB10_2: 392; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr 393; CHECK-COMMON-NEXT: mov sp, x29 394; CHECK-COMMON-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload 395; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload 396; CHECK-COMMON-NEXT: ret 397 %res = frem double %a, %b 398 ret double %res 399} 400 401; As above this should use Selection DAG to make sure the libcall is lowered correctly. 402define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounwind { 403; CHECK-COMMON-LABEL: frem_call_sm: 404; CHECK-COMMON: // %bb.0: 405; CHECK-COMMON-NEXT: sub sp, sp, #96 406; CHECK-COMMON-NEXT: cntd x9 407; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill 408; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill 409; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill 410; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill 411; CHECK-COMMON-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill 412; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill 413; CHECK-COMMON-NEXT: smstop sm 414; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload 415; CHECK-COMMON-NEXT: bl fmodf 416; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill 417; CHECK-COMMON-NEXT: smstart sm 418; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload 419; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload 420; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload 421; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload 422; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload 423; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload 424; CHECK-COMMON-NEXT: add sp, sp, #96 425; CHECK-COMMON-NEXT: ret 426 %res = frem float %a, %b 427 ret float %res 428} 429 430; As above this should use Selection DAG to make sure the libcall is lowered correctly. 431define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compatible" nounwind { 432; CHECK-COMMON-LABEL: frem_call_sm_compat: 433; CHECK-COMMON: // %bb.0: 434; CHECK-COMMON-NEXT: sub sp, sp, #112 435; CHECK-COMMON-NEXT: cntd x9 436; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill 437; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill 438; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill 439; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill 440; CHECK-COMMON-NEXT: stp x30, x9, [sp, #80] // 16-byte Folded Spill 441; CHECK-COMMON-NEXT: str x19, [sp, #96] // 8-byte Folded Spill 442; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill 443; CHECK-COMMON-NEXT: bl __arm_sme_state 444; CHECK-COMMON-NEXT: and x19, x0, #0x1 445; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2 446; CHECK-COMMON-NEXT: // %bb.1: 447; CHECK-COMMON-NEXT: smstop sm 448; CHECK-COMMON-NEXT: .LBB12_2: 449; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload 450; CHECK-COMMON-NEXT: bl fmodf 451; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill 452; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_4 453; CHECK-COMMON-NEXT: // %bb.3: 454; CHECK-COMMON-NEXT: smstart sm 455; CHECK-COMMON-NEXT: .LBB12_4: 456; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload 457; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload 458; CHECK-COMMON-NEXT: ldr x19, [sp, #96] // 8-byte Folded Reload 459; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload 460; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload 461; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload 462; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload 463; CHECK-COMMON-NEXT: add sp, sp, #112 464; CHECK-COMMON-NEXT: ret 465 %res = frem float %a, %b 466 ret float %res 467} 468 469; 470; Check ZT0 State 471; 472 473declare double @zt0_shared_callee(double) "aarch64_inout_zt0" 474 475define double @zt0_new_caller_to_zt0_shared_callee(double %x) nounwind noinline optnone "aarch64_new_zt0" { 476; CHECK-COMMON-LABEL: zt0_new_caller_to_zt0_shared_callee: 477; CHECK-COMMON: // %bb.0: // %prelude 478; CHECK-COMMON-NEXT: sub sp, sp, #80 479; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill 480; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 481; CHECK-COMMON-NEXT: cbz x8, .LBB13_2 482; CHECK-COMMON-NEXT: b .LBB13_1 483; CHECK-COMMON-NEXT: .LBB13_1: // %save.za 484; CHECK-COMMON-NEXT: mov x8, sp 485; CHECK-COMMON-NEXT: str zt0, [x8] 486; CHECK-COMMON-NEXT: bl __arm_tpidr2_save 487; CHECK-COMMON-NEXT: ldr zt0, [x8] 488; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr 489; CHECK-COMMON-NEXT: b .LBB13_2 490; CHECK-COMMON-NEXT: .LBB13_2: // %entry 491; CHECK-COMMON-NEXT: smstart za 492; CHECK-COMMON-NEXT: zero { zt0 } 493; CHECK-COMMON-NEXT: bl zt0_shared_callee 494; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 495; CHECK-COMMON-NEXT: fmov d1, x8 496; CHECK-COMMON-NEXT: fadd d0, d0, d1 497; CHECK-COMMON-NEXT: smstop za 498; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 499; CHECK-COMMON-NEXT: add sp, sp, #80 500; CHECK-COMMON-NEXT: ret 501entry: 502 %call = call double @zt0_shared_callee(double %x) 503 %add = fadd double %call, 4.200000e+01 504 ret double %add; 505} 506 507define double @zt0_shared_caller_to_normal_callee(double %x) nounwind noinline optnone "aarch64_inout_zt0" { 508; CHECK-COMMON-LABEL: zt0_shared_caller_to_normal_callee: 509; CHECK-COMMON: // %bb.0: // %entry 510; CHECK-COMMON-NEXT: sub sp, sp, #80 511; CHECK-COMMON-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill 512; CHECK-COMMON-NEXT: mov x19, sp 513; CHECK-COMMON-NEXT: str zt0, [x19] 514; CHECK-COMMON-NEXT: smstop za 515; CHECK-COMMON-NEXT: bl normal_callee 516; CHECK-COMMON-NEXT: smstart za 517; CHECK-COMMON-NEXT: ldr zt0, [x19] 518; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 519; CHECK-COMMON-NEXT: fmov d1, x8 520; CHECK-COMMON-NEXT: fadd d0, d0, d1 521; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload 522; CHECK-COMMON-NEXT: add sp, sp, #80 523; CHECK-COMMON-NEXT: ret 524entry: 525 %call = call double @normal_callee(double %x) 526 %add = fadd double %call, 4.200000e+01 527 ret double %add; 528} 529 530define void @agnostic_za_function(ptr %ptr) nounwind "aarch64_za_state_agnostic" { 531; CHECK-COMMON-LABEL: agnostic_za_function: 532; CHECK-COMMON: // %bb.0: 533; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill 534; CHECK-COMMON-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill 535; CHECK-COMMON-NEXT: mov x29, sp 536; CHECK-COMMON-NEXT: mov x8, x0 537; CHECK-COMMON-NEXT: bl __arm_sme_state_size 538; CHECK-COMMON-NEXT: sub sp, sp, x0 539; CHECK-COMMON-NEXT: mov x20, sp 540; CHECK-COMMON-NEXT: mov x0, x20 541; CHECK-COMMON-NEXT: bl __arm_sme_save 542; CHECK-COMMON-NEXT: blr x8 543; CHECK-COMMON-NEXT: mov x0, x20 544; CHECK-COMMON-NEXT: bl __arm_sme_restore 545; CHECK-COMMON-NEXT: mov sp, x29 546; CHECK-COMMON-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload 547; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload 548; CHECK-COMMON-NEXT: ret 549 call void %ptr() 550 ret void 551} 552 553