1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5define double @sin_f64(double %a) { 6; CHECK-LABEL: sin_f64: 7; CHECK: // %bb.0: // %entry 8; CHECK-NEXT: b sin 9entry: 10 %c = call double @llvm.sin.f64(double %a) 11 ret double %c 12} 13 14define float @sin_f32(float %a) { 15; CHECK-LABEL: sin_f32: 16; CHECK: // %bb.0: // %entry 17; CHECK-NEXT: b sinf 18entry: 19 %c = call float @llvm.sin.f32(float %a) 20 ret float %c 21} 22 23define half @sin_f16(half %a) { 24; CHECK-LABEL: sin_f16: 25; CHECK: // %bb.0: // %entry 26; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 27; CHECK-NEXT: .cfi_def_cfa_offset 16 28; CHECK-NEXT: .cfi_offset w30, -16 29; CHECK-NEXT: fcvt s0, h0 30; CHECK-NEXT: bl sinf 31; CHECK-NEXT: fcvt h0, s0 32; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 33; CHECK-NEXT: ret 34entry: 35 %c = call half @llvm.sin.f16(half %a) 36 ret half %c 37} 38 39define fp128 @sin_fp128(fp128 %a) { 40; CHECK-LABEL: sin_fp128: 41; CHECK: // %bb.0: // %entry 42; CHECK-NEXT: b sinl 43entry: 44 %c = call fp128 @llvm.sin.fp128(fp128 %a) 45 ret fp128 %c 46} 47 48define <1 x double> @sin_v1f64(<1 x double> %x) { 49; CHECK-LABEL: sin_v1f64: 50; CHECK: // %bb.0: 51; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 52; CHECK-NEXT: .cfi_def_cfa_offset 16 53; CHECK-NEXT: .cfi_offset w30, -16 54; CHECK-NEXT: bl sin 55; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 56; CHECK-NEXT: ret 57 %c = call <1 x double> @llvm.sin.v1f64(<1 x double> %x) 58 ret <1 x double> %c 59} 60 61define <2 x double> @sin_v2f64(<2 x double> %a) { 62; CHECK-SD-LABEL: sin_v2f64: 63; CHECK-SD: // %bb.0: // %entry 64; CHECK-SD-NEXT: sub sp, sp, #48 65; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 66; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 67; CHECK-SD-NEXT: .cfi_offset w30, -16 68; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 69; CHECK-SD-NEXT: mov d0, v0.d[1] 70; CHECK-SD-NEXT: bl sin 71; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 72; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 73; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 74; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 75; CHECK-SD-NEXT: bl sin 76; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 77; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 78; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 79; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] 80; CHECK-SD-NEXT: add sp, sp, #48 81; CHECK-SD-NEXT: ret 82; 83; CHECK-GI-LABEL: sin_v2f64: 84; CHECK-GI: // %bb.0: // %entry 85; CHECK-GI-NEXT: sub sp, sp, #32 86; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill 87; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill 88; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 89; CHECK-GI-NEXT: .cfi_offset w30, -8 90; CHECK-GI-NEXT: .cfi_offset b8, -16 91; CHECK-GI-NEXT: mov d8, v0.d[1] 92; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 93; CHECK-GI-NEXT: bl sin 94; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 95; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 96; CHECK-GI-NEXT: fmov d0, d8 97; CHECK-GI-NEXT: bl sin 98; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload 99; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 100; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload 101; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload 102; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] 103; CHECK-GI-NEXT: mov v0.16b, v1.16b 104; CHECK-GI-NEXT: add sp, sp, #32 105; CHECK-GI-NEXT: ret 106entry: 107 %c = call <2 x double> @llvm.sin.v2f64(<2 x double> %a) 108 ret <2 x double> %c 109} 110 111define <3 x double> @sin_v3f64(<3 x double> %a) { 112; CHECK-SD-LABEL: sin_v3f64: 113; CHECK-SD: // %bb.0: // %entry 114; CHECK-SD-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill 115; CHECK-SD-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill 116; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Folded Spill 117; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 118; CHECK-SD-NEXT: .cfi_offset w30, -8 119; CHECK-SD-NEXT: .cfi_offset b8, -16 120; CHECK-SD-NEXT: .cfi_offset b9, -24 121; CHECK-SD-NEXT: .cfi_offset b10, -32 122; CHECK-SD-NEXT: fmov d8, d2 123; CHECK-SD-NEXT: fmov d9, d1 124; CHECK-SD-NEXT: bl sin 125; CHECK-SD-NEXT: fmov d10, d0 126; CHECK-SD-NEXT: fmov d0, d9 127; CHECK-SD-NEXT: bl sin 128; CHECK-SD-NEXT: fmov d9, d0 129; CHECK-SD-NEXT: fmov d0, d8 130; CHECK-SD-NEXT: bl sin 131; CHECK-SD-NEXT: fmov d1, d9 132; CHECK-SD-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload 133; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload 134; CHECK-SD-NEXT: fmov d2, d0 135; CHECK-SD-NEXT: fmov d0, d10 136; CHECK-SD-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload 137; CHECK-SD-NEXT: ret 138; 139; CHECK-GI-LABEL: sin_v3f64: 140; CHECK-GI: // %bb.0: // %entry 141; CHECK-GI-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill 142; CHECK-GI-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill 143; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill 144; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 145; CHECK-GI-NEXT: .cfi_offset w30, -8 146; CHECK-GI-NEXT: .cfi_offset b8, -16 147; CHECK-GI-NEXT: .cfi_offset b9, -24 148; CHECK-GI-NEXT: .cfi_offset b10, -32 149; CHECK-GI-NEXT: fmov d8, d1 150; CHECK-GI-NEXT: fmov d9, d2 151; CHECK-GI-NEXT: bl sin 152; CHECK-GI-NEXT: fmov d10, d0 153; CHECK-GI-NEXT: fmov d0, d8 154; CHECK-GI-NEXT: bl sin 155; CHECK-GI-NEXT: fmov d8, d0 156; CHECK-GI-NEXT: fmov d0, d9 157; CHECK-GI-NEXT: bl sin 158; CHECK-GI-NEXT: fmov d1, d8 159; CHECK-GI-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload 160; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload 161; CHECK-GI-NEXT: fmov d2, d0 162; CHECK-GI-NEXT: fmov d0, d10 163; CHECK-GI-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload 164; CHECK-GI-NEXT: ret 165entry: 166 %c = call <3 x double> @llvm.sin.v3f64(<3 x double> %a) 167 ret <3 x double> %c 168} 169 170define <4 x double> @sin_v4f64(<4 x double> %a) { 171; CHECK-SD-LABEL: sin_v4f64: 172; CHECK-SD: // %bb.0: // %entry 173; CHECK-SD-NEXT: sub sp, sp, #64 174; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 175; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 176; CHECK-SD-NEXT: .cfi_offset w30, -16 177; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 178; CHECK-SD-NEXT: mov d0, v0.d[1] 179; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 180; CHECK-SD-NEXT: bl sin 181; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 182; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 183; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 184; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 185; CHECK-SD-NEXT: bl sin 186; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 187; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 188; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] 189; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 190; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload 191; CHECK-SD-NEXT: mov d0, v0.d[1] 192; CHECK-SD-NEXT: bl sin 193; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 194; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 195; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload 196; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 197; CHECK-SD-NEXT: bl sin 198; CHECK-SD-NEXT: fmov d1, d0 199; CHECK-SD-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload 200; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 201; CHECK-SD-NEXT: mov v1.d[1], v2.d[0] 202; CHECK-SD-NEXT: add sp, sp, #64 203; CHECK-SD-NEXT: ret 204; 205; CHECK-GI-LABEL: sin_v4f64: 206; CHECK-GI: // %bb.0: // %entry 207; CHECK-GI-NEXT: sub sp, sp, #80 208; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 209; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill 210; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 211; CHECK-GI-NEXT: .cfi_offset w30, -16 212; CHECK-GI-NEXT: .cfi_offset b8, -24 213; CHECK-GI-NEXT: .cfi_offset b9, -32 214; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill 215; CHECK-GI-NEXT: mov d8, v0.d[1] 216; CHECK-GI-NEXT: mov d9, v1.d[1] 217; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 218; CHECK-GI-NEXT: bl sin 219; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 220; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 221; CHECK-GI-NEXT: fmov d0, d8 222; CHECK-GI-NEXT: bl sin 223; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 224; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 225; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload 226; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 227; CHECK-GI-NEXT: bl sin 228; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 229; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 230; CHECK-GI-NEXT: fmov d0, d9 231; CHECK-GI-NEXT: bl sin 232; CHECK-GI-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload 233; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 234; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 235; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 236; CHECK-GI-NEXT: mov v2.d[1], v1.d[0] 237; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload 238; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] 239; CHECK-GI-NEXT: mov v0.16b, v2.16b 240; CHECK-GI-NEXT: add sp, sp, #80 241; CHECK-GI-NEXT: ret 242entry: 243 %c = call <4 x double> @llvm.sin.v4f64(<4 x double> %a) 244 ret <4 x double> %c 245} 246 247define <2 x float> @sin_v2f32(<2 x float> %a) { 248; CHECK-SD-LABEL: sin_v2f32: 249; CHECK-SD: // %bb.0: // %entry 250; CHECK-SD-NEXT: sub sp, sp, #48 251; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 252; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 253; CHECK-SD-NEXT: .cfi_offset w30, -16 254; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 255; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 256; CHECK-SD-NEXT: mov s0, v0.s[1] 257; CHECK-SD-NEXT: bl sinf 258; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 259; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 260; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 261; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 262; CHECK-SD-NEXT: bl sinf 263; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 264; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 265; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 266; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] 267; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 268; CHECK-SD-NEXT: add sp, sp, #48 269; CHECK-SD-NEXT: ret 270; 271; CHECK-GI-LABEL: sin_v2f32: 272; CHECK-GI: // %bb.0: // %entry 273; CHECK-GI-NEXT: sub sp, sp, #32 274; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill 275; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill 276; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 277; CHECK-GI-NEXT: .cfi_offset w30, -8 278; CHECK-GI-NEXT: .cfi_offset b8, -16 279; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 280; CHECK-GI-NEXT: mov s8, v0.s[1] 281; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 282; CHECK-GI-NEXT: bl sinf 283; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 284; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 285; CHECK-GI-NEXT: fmov s0, s8 286; CHECK-GI-NEXT: bl sinf 287; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload 288; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 289; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload 290; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload 291; CHECK-GI-NEXT: mov v1.s[1], v0.s[0] 292; CHECK-GI-NEXT: fmov d0, d1 293; CHECK-GI-NEXT: add sp, sp, #32 294; CHECK-GI-NEXT: ret 295entry: 296 %c = call <2 x float> @llvm.sin.v2f32(<2 x float> %a) 297 ret <2 x float> %c 298} 299 300define <3 x float> @sin_v3f32(<3 x float> %a) { 301; CHECK-SD-LABEL: sin_v3f32: 302; CHECK-SD: // %bb.0: // %entry 303; CHECK-SD-NEXT: sub sp, sp, #48 304; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 305; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 306; CHECK-SD-NEXT: .cfi_offset w30, -16 307; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 308; CHECK-SD-NEXT: mov s0, v0.s[1] 309; CHECK-SD-NEXT: bl sinf 310; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 311; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 312; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 313; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 314; CHECK-SD-NEXT: bl sinf 315; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 316; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 317; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] 318; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 319; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 320; CHECK-SD-NEXT: mov s0, v0.s[2] 321; CHECK-SD-NEXT: bl sinf 322; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 323; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 324; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 325; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] 326; CHECK-SD-NEXT: mov v0.16b, v1.16b 327; CHECK-SD-NEXT: add sp, sp, #48 328; CHECK-SD-NEXT: ret 329; 330; CHECK-GI-LABEL: sin_v3f32: 331; CHECK-GI: // %bb.0: // %entry 332; CHECK-GI-NEXT: sub sp, sp, #64 333; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill 334; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 335; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 336; CHECK-GI-NEXT: .cfi_offset w30, -16 337; CHECK-GI-NEXT: .cfi_offset b8, -24 338; CHECK-GI-NEXT: .cfi_offset b9, -32 339; CHECK-GI-NEXT: mov s8, v0.s[1] 340; CHECK-GI-NEXT: mov s9, v0.s[2] 341; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 342; CHECK-GI-NEXT: bl sinf 343; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 344; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 345; CHECK-GI-NEXT: fmov s0, s8 346; CHECK-GI-NEXT: bl sinf 347; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 348; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 349; CHECK-GI-NEXT: fmov s0, s9 350; CHECK-GI-NEXT: bl sinf 351; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload 352; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 353; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 354; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload 355; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] 356; CHECK-GI-NEXT: mov v1.s[2], v0.s[0] 357; CHECK-GI-NEXT: mov v0.16b, v1.16b 358; CHECK-GI-NEXT: add sp, sp, #64 359; CHECK-GI-NEXT: ret 360entry: 361 %c = call <3 x float> @llvm.sin.v3f32(<3 x float> %a) 362 ret <3 x float> %c 363} 364 365define <4 x float> @sin_v4f32(<4 x float> %a) { 366; CHECK-SD-LABEL: sin_v4f32: 367; CHECK-SD: // %bb.0: // %entry 368; CHECK-SD-NEXT: sub sp, sp, #48 369; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 370; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 371; CHECK-SD-NEXT: .cfi_offset w30, -16 372; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 373; CHECK-SD-NEXT: mov s0, v0.s[1] 374; CHECK-SD-NEXT: bl sinf 375; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 376; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 377; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 378; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 379; CHECK-SD-NEXT: bl sinf 380; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 381; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 382; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] 383; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 384; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 385; CHECK-SD-NEXT: mov s0, v0.s[2] 386; CHECK-SD-NEXT: bl sinf 387; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 388; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 389; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] 390; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 391; CHECK-SD-NEXT: mov s0, v0.s[3] 392; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill 393; CHECK-SD-NEXT: bl sinf 394; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 395; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 396; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 397; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] 398; CHECK-SD-NEXT: mov v0.16b, v1.16b 399; CHECK-SD-NEXT: add sp, sp, #48 400; CHECK-SD-NEXT: ret 401; 402; CHECK-GI-LABEL: sin_v4f32: 403; CHECK-GI: // %bb.0: // %entry 404; CHECK-GI-NEXT: sub sp, sp, #80 405; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill 406; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill 407; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill 408; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 409; CHECK-GI-NEXT: .cfi_offset w30, -8 410; CHECK-GI-NEXT: .cfi_offset b8, -16 411; CHECK-GI-NEXT: .cfi_offset b9, -24 412; CHECK-GI-NEXT: .cfi_offset b10, -32 413; CHECK-GI-NEXT: mov s8, v0.s[1] 414; CHECK-GI-NEXT: mov s9, v0.s[2] 415; CHECK-GI-NEXT: mov s10, v0.s[3] 416; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 417; CHECK-GI-NEXT: bl sinf 418; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 419; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 420; CHECK-GI-NEXT: fmov s0, s8 421; CHECK-GI-NEXT: bl sinf 422; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 423; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 424; CHECK-GI-NEXT: fmov s0, s9 425; CHECK-GI-NEXT: bl sinf 426; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 427; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 428; CHECK-GI-NEXT: fmov s0, s10 429; CHECK-GI-NEXT: bl sinf 430; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload 431; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 432; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload 433; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload 434; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload 435; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] 436; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload 437; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] 438; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] 439; CHECK-GI-NEXT: mov v0.16b, v1.16b 440; CHECK-GI-NEXT: add sp, sp, #80 441; CHECK-GI-NEXT: ret 442entry: 443 %c = call <4 x float> @llvm.sin.v4f32(<4 x float> %a) 444 ret <4 x float> %c 445} 446 447define <8 x float> @sin_v8f32(<8 x float> %a) { 448; CHECK-SD-LABEL: sin_v8f32: 449; CHECK-SD: // %bb.0: // %entry 450; CHECK-SD-NEXT: sub sp, sp, #64 451; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 452; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 453; CHECK-SD-NEXT: .cfi_offset w30, -16 454; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill 455; CHECK-SD-NEXT: mov s0, v0.s[1] 456; CHECK-SD-NEXT: bl sinf 457; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 458; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 459; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 460; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 461; CHECK-SD-NEXT: bl sinf 462; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 463; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 464; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] 465; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 466; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 467; CHECK-SD-NEXT: mov s0, v0.s[2] 468; CHECK-SD-NEXT: bl sinf 469; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 470; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 471; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] 472; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 473; CHECK-SD-NEXT: mov s0, v0.s[3] 474; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 475; CHECK-SD-NEXT: bl sinf 476; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 477; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 478; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] 479; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 480; CHECK-SD-NEXT: mov s0, v0.s[1] 481; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 482; CHECK-SD-NEXT: bl sinf 483; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 484; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 485; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 486; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 487; CHECK-SD-NEXT: bl sinf 488; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 489; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 490; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] 491; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 492; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 493; CHECK-SD-NEXT: mov s0, v0.s[2] 494; CHECK-SD-NEXT: bl sinf 495; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 496; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 497; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] 498; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 499; CHECK-SD-NEXT: mov s0, v0.s[3] 500; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill 501; CHECK-SD-NEXT: bl sinf 502; CHECK-SD-NEXT: fmov s2, s0 503; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 504; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload 505; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 506; CHECK-SD-NEXT: mov v1.s[3], v2.s[0] 507; CHECK-SD-NEXT: add sp, sp, #64 508; CHECK-SD-NEXT: ret 509; 510; CHECK-GI-LABEL: sin_v8f32: 511; CHECK-GI: // %bb.0: // %entry 512; CHECK-GI-NEXT: sub sp, sp, #176 513; CHECK-GI-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill 514; CHECK-GI-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill 515; CHECK-GI-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill 516; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill 517; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 518; CHECK-GI-NEXT: .cfi_offset w30, -16 519; CHECK-GI-NEXT: .cfi_offset b8, -24 520; CHECK-GI-NEXT: .cfi_offset b9, -32 521; CHECK-GI-NEXT: .cfi_offset b10, -40 522; CHECK-GI-NEXT: .cfi_offset b11, -48 523; CHECK-GI-NEXT: .cfi_offset b12, -56 524; CHECK-GI-NEXT: .cfi_offset b13, -64 525; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 526; CHECK-GI-NEXT: mov s8, v0.s[1] 527; CHECK-GI-NEXT: mov s9, v0.s[2] 528; CHECK-GI-NEXT: mov s10, v0.s[3] 529; CHECK-GI-NEXT: mov s11, v1.s[1] 530; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 531; CHECK-GI-NEXT: mov s12, v1.s[2] 532; CHECK-GI-NEXT: mov s13, v1.s[3] 533; CHECK-GI-NEXT: bl sinf 534; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 535; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 536; CHECK-GI-NEXT: fmov s0, s8 537; CHECK-GI-NEXT: bl sinf 538; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 539; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill 540; CHECK-GI-NEXT: fmov s0, s9 541; CHECK-GI-NEXT: bl sinf 542; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 543; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill 544; CHECK-GI-NEXT: fmov s0, s10 545; CHECK-GI-NEXT: bl sinf 546; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 547; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill 548; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload 549; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 550; CHECK-GI-NEXT: bl sinf 551; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 552; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 553; CHECK-GI-NEXT: fmov s0, s11 554; CHECK-GI-NEXT: bl sinf 555; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 556; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 557; CHECK-GI-NEXT: fmov s0, s12 558; CHECK-GI-NEXT: bl sinf 559; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 560; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 561; CHECK-GI-NEXT: fmov s0, s13 562; CHECK-GI-NEXT: bl sinf 563; CHECK-GI-NEXT: ldp q2, q1, [sp, #48] // 32-byte Folded Reload 564; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 565; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload 566; CHECK-GI-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload 567; CHECK-GI-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload 568; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] 569; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload 570; CHECK-GI-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload 571; CHECK-GI-NEXT: mov v3.s[1], v2.s[0] 572; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload 573; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] 574; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload 575; CHECK-GI-NEXT: mov v3.s[2], v2.s[0] 576; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload 577; CHECK-GI-NEXT: mov v1.s[3], v2.s[0] 578; CHECK-GI-NEXT: mov v3.s[3], v0.s[0] 579; CHECK-GI-NEXT: mov v2.16b, v1.16b 580; CHECK-GI-NEXT: mov v1.16b, v3.16b 581; CHECK-GI-NEXT: mov v0.16b, v2.16b 582; CHECK-GI-NEXT: add sp, sp, #176 583; CHECK-GI-NEXT: ret 584entry: 585 %c = call <8 x float> @llvm.sin.v8f32(<8 x float> %a) 586 ret <8 x float> %c 587} 588 589define <7 x half> @sin_v7f16(<7 x half> %a) { 590; CHECK-SD-LABEL: sin_v7f16: 591; CHECK-SD: // %bb.0: // %entry 592; CHECK-SD-NEXT: sub sp, sp, #48 593; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 594; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 595; CHECK-SD-NEXT: .cfi_offset w30, -16 596; CHECK-SD-NEXT: mov h1, v0.h[1] 597; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 598; CHECK-SD-NEXT: fcvt s0, h1 599; CHECK-SD-NEXT: bl sinf 600; CHECK-SD-NEXT: fcvt h0, s0 601; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 602; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 603; CHECK-SD-NEXT: fcvt s0, h0 604; CHECK-SD-NEXT: bl sinf 605; CHECK-SD-NEXT: fcvt h0, s0 606; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 607; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] 608; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 609; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 610; CHECK-SD-NEXT: mov h0, v0.h[2] 611; CHECK-SD-NEXT: fcvt s0, h0 612; CHECK-SD-NEXT: bl sinf 613; CHECK-SD-NEXT: fcvt h0, s0 614; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 615; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] 616; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 617; CHECK-SD-NEXT: mov h0, v0.h[3] 618; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 619; CHECK-SD-NEXT: fcvt s0, h0 620; CHECK-SD-NEXT: bl sinf 621; CHECK-SD-NEXT: fcvt h0, s0 622; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 623; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] 624; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 625; CHECK-SD-NEXT: mov h0, v0.h[4] 626; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 627; CHECK-SD-NEXT: fcvt s0, h0 628; CHECK-SD-NEXT: bl sinf 629; CHECK-SD-NEXT: fcvt h0, s0 630; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 631; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] 632; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 633; CHECK-SD-NEXT: mov h0, v0.h[5] 634; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 635; CHECK-SD-NEXT: fcvt s0, h0 636; CHECK-SD-NEXT: bl sinf 637; CHECK-SD-NEXT: fcvt h0, s0 638; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 639; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] 640; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 641; CHECK-SD-NEXT: mov h0, v0.h[6] 642; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 643; CHECK-SD-NEXT: fcvt s0, h0 644; CHECK-SD-NEXT: bl sinf 645; CHECK-SD-NEXT: fcvt h0, s0 646; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 647; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] 648; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 649; CHECK-SD-NEXT: mov h0, v0.h[7] 650; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 651; CHECK-SD-NEXT: fcvt s0, h0 652; CHECK-SD-NEXT: bl sinf 653; CHECK-SD-NEXT: fcvt h1, s0 654; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 655; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 656; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] 657; CHECK-SD-NEXT: add sp, sp, #48 658; CHECK-SD-NEXT: ret 659; 660; CHECK-GI-LABEL: sin_v7f16: 661; CHECK-GI: // %bb.0: // %entry 662; CHECK-GI-NEXT: sub sp, sp, #160 663; CHECK-GI-NEXT: stp d13, d12, [sp, #96] // 16-byte Folded Spill 664; CHECK-GI-NEXT: stp d11, d10, [sp, #112] // 16-byte Folded Spill 665; CHECK-GI-NEXT: stp d9, d8, [sp, #128] // 16-byte Folded Spill 666; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Folded Spill 667; CHECK-GI-NEXT: .cfi_def_cfa_offset 160 668; CHECK-GI-NEXT: .cfi_offset w30, -16 669; CHECK-GI-NEXT: .cfi_offset b8, -24 670; CHECK-GI-NEXT: .cfi_offset b9, -32 671; CHECK-GI-NEXT: .cfi_offset b10, -40 672; CHECK-GI-NEXT: .cfi_offset b11, -48 673; CHECK-GI-NEXT: .cfi_offset b12, -56 674; CHECK-GI-NEXT: .cfi_offset b13, -64 675; CHECK-GI-NEXT: mov h8, v0.h[1] 676; CHECK-GI-NEXT: mov h9, v0.h[2] 677; CHECK-GI-NEXT: mov h10, v0.h[3] 678; CHECK-GI-NEXT: mov h11, v0.h[4] 679; CHECK-GI-NEXT: mov h12, v0.h[5] 680; CHECK-GI-NEXT: mov h13, v0.h[6] 681; CHECK-GI-NEXT: fcvt s0, h0 682; CHECK-GI-NEXT: bl sinf 683; CHECK-GI-NEXT: fcvt s1, h8 684; CHECK-GI-NEXT: fcvt h0, s0 685; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill 686; CHECK-GI-NEXT: fmov s0, s1 687; CHECK-GI-NEXT: bl sinf 688; CHECK-GI-NEXT: fcvt s1, h9 689; CHECK-GI-NEXT: fcvt h0, s0 690; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill 691; CHECK-GI-NEXT: fmov s0, s1 692; CHECK-GI-NEXT: bl sinf 693; CHECK-GI-NEXT: fcvt s1, h10 694; CHECK-GI-NEXT: fcvt h0, s0 695; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 696; CHECK-GI-NEXT: fmov s0, s1 697; CHECK-GI-NEXT: bl sinf 698; CHECK-GI-NEXT: fcvt s1, h11 699; CHECK-GI-NEXT: fcvt h0, s0 700; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 701; CHECK-GI-NEXT: fmov s0, s1 702; CHECK-GI-NEXT: bl sinf 703; CHECK-GI-NEXT: fcvt s1, h12 704; CHECK-GI-NEXT: fcvt h0, s0 705; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 706; CHECK-GI-NEXT: fmov s0, s1 707; CHECK-GI-NEXT: bl sinf 708; CHECK-GI-NEXT: fcvt s1, h13 709; CHECK-GI-NEXT: fcvt h0, s0 710; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 711; CHECK-GI-NEXT: fmov s0, s1 712; CHECK-GI-NEXT: bl sinf 713; CHECK-GI-NEXT: ldp q3, q2, [sp, #48] // 32-byte Folded Reload 714; CHECK-GI-NEXT: fcvt h0, s0 715; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload 716; CHECK-GI-NEXT: ldp d9, d8, [sp, #128] // 16-byte Folded Reload 717; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload 718; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload 719; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] 720; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload 721; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] 722; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload 723; CHECK-GI-NEXT: mov v1.h[3], v3.h[0] 724; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] 725; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload 726; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] 727; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] 728; CHECK-GI-NEXT: mov v0.16b, v1.16b 729; CHECK-GI-NEXT: add sp, sp, #160 730; CHECK-GI-NEXT: ret 731entry: 732 %c = call <7 x half> @llvm.sin.v7f16(<7 x half> %a) 733 ret <7 x half> %c 734} 735 736define <4 x half> @sin_v4f16(<4 x half> %a) { 737; CHECK-SD-LABEL: sin_v4f16: 738; CHECK-SD: // %bb.0: // %entry 739; CHECK-SD-NEXT: sub sp, sp, #48 740; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 741; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 742; CHECK-SD-NEXT: .cfi_offset w30, -16 743; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 744; CHECK-SD-NEXT: mov h1, v0.h[1] 745; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 746; CHECK-SD-NEXT: fcvt s0, h1 747; CHECK-SD-NEXT: bl sinf 748; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 749; CHECK-SD-NEXT: fcvt h0, s0 750; CHECK-SD-NEXT: fcvt s1, h1 751; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 752; CHECK-SD-NEXT: fmov s0, s1 753; CHECK-SD-NEXT: bl sinf 754; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 755; CHECK-SD-NEXT: fcvt h2, s0 756; CHECK-SD-NEXT: mov h1, v1.h[2] 757; CHECK-SD-NEXT: fcvt s0, h1 758; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 759; CHECK-SD-NEXT: mov v2.h[1], v1.h[0] 760; CHECK-SD-NEXT: str q2, [sp] // 16-byte Folded Spill 761; CHECK-SD-NEXT: bl sinf 762; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 763; CHECK-SD-NEXT: fcvt h2, s0 764; CHECK-SD-NEXT: mov h1, v1.h[3] 765; CHECK-SD-NEXT: fcvt s0, h1 766; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 767; CHECK-SD-NEXT: mov v1.h[2], v2.h[0] 768; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill 769; CHECK-SD-NEXT: bl sinf 770; CHECK-SD-NEXT: fcvt h1, s0 771; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 772; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 773; CHECK-SD-NEXT: mov v0.h[3], v1.h[0] 774; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 775; CHECK-SD-NEXT: add sp, sp, #48 776; CHECK-SD-NEXT: ret 777; 778; CHECK-GI-LABEL: sin_v4f16: 779; CHECK-GI: // %bb.0: // %entry 780; CHECK-GI-NEXT: sub sp, sp, #80 781; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill 782; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill 783; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill 784; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 785; CHECK-GI-NEXT: .cfi_offset w30, -8 786; CHECK-GI-NEXT: .cfi_offset b8, -16 787; CHECK-GI-NEXT: .cfi_offset b9, -24 788; CHECK-GI-NEXT: .cfi_offset b10, -32 789; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 790; CHECK-GI-NEXT: mov h8, v0.h[1] 791; CHECK-GI-NEXT: mov h9, v0.h[2] 792; CHECK-GI-NEXT: mov h10, v0.h[3] 793; CHECK-GI-NEXT: fcvt s0, h0 794; CHECK-GI-NEXT: bl sinf 795; CHECK-GI-NEXT: fcvt s1, h8 796; CHECK-GI-NEXT: fcvt h0, s0 797; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 798; CHECK-GI-NEXT: fmov s0, s1 799; CHECK-GI-NEXT: bl sinf 800; CHECK-GI-NEXT: fcvt s1, h9 801; CHECK-GI-NEXT: fcvt h0, s0 802; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 803; CHECK-GI-NEXT: fmov s0, s1 804; CHECK-GI-NEXT: bl sinf 805; CHECK-GI-NEXT: fcvt s1, h10 806; CHECK-GI-NEXT: fcvt h0, s0 807; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 808; CHECK-GI-NEXT: fmov s0, s1 809; CHECK-GI-NEXT: bl sinf 810; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload 811; CHECK-GI-NEXT: fcvt h0, s0 812; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 813; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload 814; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload 815; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload 816; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] 817; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] 818; CHECK-GI-NEXT: mov v1.h[3], v0.h[0] 819; CHECK-GI-NEXT: mov v0.16b, v1.16b 820; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 821; CHECK-GI-NEXT: add sp, sp, #80 822; CHECK-GI-NEXT: ret 823entry: 824 %c = call <4 x half> @llvm.sin.v4f16(<4 x half> %a) 825 ret <4 x half> %c 826} 827 828define <8 x half> @sin_v8f16(<8 x half> %a) { 829; CHECK-SD-LABEL: sin_v8f16: 830; CHECK-SD: // %bb.0: // %entry 831; CHECK-SD-NEXT: sub sp, sp, #48 832; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 833; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 834; CHECK-SD-NEXT: .cfi_offset w30, -16 835; CHECK-SD-NEXT: mov h1, v0.h[1] 836; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 837; CHECK-SD-NEXT: fcvt s0, h1 838; CHECK-SD-NEXT: bl sinf 839; CHECK-SD-NEXT: fcvt h0, s0 840; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 841; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 842; CHECK-SD-NEXT: fcvt s0, h0 843; CHECK-SD-NEXT: bl sinf 844; CHECK-SD-NEXT: fcvt h0, s0 845; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 846; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] 847; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 848; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 849; CHECK-SD-NEXT: mov h0, v0.h[2] 850; CHECK-SD-NEXT: fcvt s0, h0 851; CHECK-SD-NEXT: bl sinf 852; CHECK-SD-NEXT: fcvt h0, s0 853; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 854; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] 855; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 856; CHECK-SD-NEXT: mov h0, v0.h[3] 857; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 858; CHECK-SD-NEXT: fcvt s0, h0 859; CHECK-SD-NEXT: bl sinf 860; CHECK-SD-NEXT: fcvt h0, s0 861; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 862; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] 863; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 864; CHECK-SD-NEXT: mov h0, v0.h[4] 865; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 866; CHECK-SD-NEXT: fcvt s0, h0 867; CHECK-SD-NEXT: bl sinf 868; CHECK-SD-NEXT: fcvt h0, s0 869; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 870; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] 871; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 872; CHECK-SD-NEXT: mov h0, v0.h[5] 873; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 874; CHECK-SD-NEXT: fcvt s0, h0 875; CHECK-SD-NEXT: bl sinf 876; CHECK-SD-NEXT: fcvt h0, s0 877; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 878; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] 879; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 880; CHECK-SD-NEXT: mov h0, v0.h[6] 881; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 882; CHECK-SD-NEXT: fcvt s0, h0 883; CHECK-SD-NEXT: bl sinf 884; CHECK-SD-NEXT: fcvt h0, s0 885; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 886; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] 887; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 888; CHECK-SD-NEXT: mov h0, v0.h[7] 889; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 890; CHECK-SD-NEXT: fcvt s0, h0 891; CHECK-SD-NEXT: bl sinf 892; CHECK-SD-NEXT: fcvt h1, s0 893; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 894; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 895; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] 896; CHECK-SD-NEXT: add sp, sp, #48 897; CHECK-SD-NEXT: ret 898; 899; CHECK-GI-LABEL: sin_v8f16: 900; CHECK-GI: // %bb.0: // %entry 901; CHECK-GI-NEXT: sub sp, sp, #176 902; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Folded Spill 903; CHECK-GI-NEXT: stp d13, d12, [sp, #120] // 16-byte Folded Spill 904; CHECK-GI-NEXT: stp d11, d10, [sp, #136] // 16-byte Folded Spill 905; CHECK-GI-NEXT: stp d9, d8, [sp, #152] // 16-byte Folded Spill 906; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Folded Spill 907; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 908; CHECK-GI-NEXT: .cfi_offset w30, -8 909; CHECK-GI-NEXT: .cfi_offset b8, -16 910; CHECK-GI-NEXT: .cfi_offset b9, -24 911; CHECK-GI-NEXT: .cfi_offset b10, -32 912; CHECK-GI-NEXT: .cfi_offset b11, -40 913; CHECK-GI-NEXT: .cfi_offset b12, -48 914; CHECK-GI-NEXT: .cfi_offset b13, -56 915; CHECK-GI-NEXT: .cfi_offset b14, -64 916; CHECK-GI-NEXT: mov h8, v0.h[1] 917; CHECK-GI-NEXT: mov h9, v0.h[2] 918; CHECK-GI-NEXT: mov h10, v0.h[3] 919; CHECK-GI-NEXT: mov h11, v0.h[4] 920; CHECK-GI-NEXT: mov h12, v0.h[5] 921; CHECK-GI-NEXT: mov h13, v0.h[6] 922; CHECK-GI-NEXT: mov h14, v0.h[7] 923; CHECK-GI-NEXT: fcvt s0, h0 924; CHECK-GI-NEXT: bl sinf 925; CHECK-GI-NEXT: fcvt s1, h8 926; CHECK-GI-NEXT: fcvt h0, s0 927; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill 928; CHECK-GI-NEXT: fmov s0, s1 929; CHECK-GI-NEXT: bl sinf 930; CHECK-GI-NEXT: fcvt s1, h9 931; CHECK-GI-NEXT: fcvt h0, s0 932; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 933; CHECK-GI-NEXT: fmov s0, s1 934; CHECK-GI-NEXT: bl sinf 935; CHECK-GI-NEXT: fcvt s1, h10 936; CHECK-GI-NEXT: fcvt h0, s0 937; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill 938; CHECK-GI-NEXT: fmov s0, s1 939; CHECK-GI-NEXT: bl sinf 940; CHECK-GI-NEXT: fcvt s1, h11 941; CHECK-GI-NEXT: fcvt h0, s0 942; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill 943; CHECK-GI-NEXT: fmov s0, s1 944; CHECK-GI-NEXT: bl sinf 945; CHECK-GI-NEXT: fcvt s1, h12 946; CHECK-GI-NEXT: fcvt h0, s0 947; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 948; CHECK-GI-NEXT: fmov s0, s1 949; CHECK-GI-NEXT: bl sinf 950; CHECK-GI-NEXT: fcvt s1, h13 951; CHECK-GI-NEXT: fcvt h0, s0 952; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 953; CHECK-GI-NEXT: fmov s0, s1 954; CHECK-GI-NEXT: bl sinf 955; CHECK-GI-NEXT: fcvt s1, h14 956; CHECK-GI-NEXT: fcvt h0, s0 957; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 958; CHECK-GI-NEXT: fmov s0, s1 959; CHECK-GI-NEXT: bl sinf 960; CHECK-GI-NEXT: ldp q3, q2, [sp, #64] // 32-byte Folded Reload 961; CHECK-GI-NEXT: fcvt h0, s0 962; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload 963; CHECK-GI-NEXT: ldp d9, d8, [sp, #152] // 16-byte Folded Reload 964; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload 965; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Folded Reload 966; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] 967; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload 968; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload 969; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] 970; CHECK-GI-NEXT: ldp q2, q3, [sp, #32] // 32-byte Folded Reload 971; CHECK-GI-NEXT: mov v1.h[3], v3.h[0] 972; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] 973; CHECK-GI-NEXT: ldp q2, q3, [sp] // 32-byte Folded Reload 974; CHECK-GI-NEXT: mov v1.h[5], v3.h[0] 975; CHECK-GI-NEXT: mov v1.h[6], v2.h[0] 976; CHECK-GI-NEXT: mov v1.h[7], v0.h[0] 977; CHECK-GI-NEXT: mov v0.16b, v1.16b 978; CHECK-GI-NEXT: add sp, sp, #176 979; CHECK-GI-NEXT: ret 980entry: 981 %c = call <8 x half> @llvm.sin.v8f16(<8 x half> %a) 982 ret <8 x half> %c 983} 984 985define <16 x half> @sin_v16f16(<16 x half> %a) { 986; CHECK-SD-LABEL: sin_v16f16: 987; CHECK-SD: // %bb.0: // %entry 988; CHECK-SD-NEXT: sub sp, sp, #64 989; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 990; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 991; CHECK-SD-NEXT: .cfi_offset w30, -16 992; CHECK-SD-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill 993; CHECK-SD-NEXT: mov h1, v0.h[1] 994; CHECK-SD-NEXT: fcvt s0, h1 995; CHECK-SD-NEXT: bl sinf 996; CHECK-SD-NEXT: fcvt h0, s0 997; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 998; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 999; CHECK-SD-NEXT: fcvt s0, h0 1000; CHECK-SD-NEXT: bl sinf 1001; CHECK-SD-NEXT: fcvt h0, s0 1002; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1003; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] 1004; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 1005; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1006; CHECK-SD-NEXT: mov h0, v0.h[2] 1007; CHECK-SD-NEXT: fcvt s0, h0 1008; CHECK-SD-NEXT: bl sinf 1009; CHECK-SD-NEXT: fcvt h0, s0 1010; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1011; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] 1012; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1013; CHECK-SD-NEXT: mov h0, v0.h[3] 1014; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 1015; CHECK-SD-NEXT: fcvt s0, h0 1016; CHECK-SD-NEXT: bl sinf 1017; CHECK-SD-NEXT: fcvt h0, s0 1018; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1019; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] 1020; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1021; CHECK-SD-NEXT: mov h0, v0.h[4] 1022; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 1023; CHECK-SD-NEXT: fcvt s0, h0 1024; CHECK-SD-NEXT: bl sinf 1025; CHECK-SD-NEXT: fcvt h0, s0 1026; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1027; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] 1028; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1029; CHECK-SD-NEXT: mov h0, v0.h[5] 1030; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 1031; CHECK-SD-NEXT: fcvt s0, h0 1032; CHECK-SD-NEXT: bl sinf 1033; CHECK-SD-NEXT: fcvt h0, s0 1034; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1035; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] 1036; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1037; CHECK-SD-NEXT: mov h0, v0.h[6] 1038; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 1039; CHECK-SD-NEXT: fcvt s0, h0 1040; CHECK-SD-NEXT: bl sinf 1041; CHECK-SD-NEXT: fcvt h0, s0 1042; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1043; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] 1044; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1045; CHECK-SD-NEXT: mov h0, v0.h[7] 1046; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 1047; CHECK-SD-NEXT: fcvt s0, h0 1048; CHECK-SD-NEXT: bl sinf 1049; CHECK-SD-NEXT: fcvt h0, s0 1050; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1051; CHECK-SD-NEXT: mov v1.h[7], v0.h[0] 1052; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1053; CHECK-SD-NEXT: mov h0, v0.h[1] 1054; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 1055; CHECK-SD-NEXT: fcvt s0, h0 1056; CHECK-SD-NEXT: bl sinf 1057; CHECK-SD-NEXT: fcvt h0, s0 1058; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1059; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1060; CHECK-SD-NEXT: fcvt s0, h0 1061; CHECK-SD-NEXT: bl sinf 1062; CHECK-SD-NEXT: fcvt h0, s0 1063; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 1064; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] 1065; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1066; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1067; CHECK-SD-NEXT: mov h0, v0.h[2] 1068; CHECK-SD-NEXT: fcvt s0, h0 1069; CHECK-SD-NEXT: bl sinf 1070; CHECK-SD-NEXT: fcvt h0, s0 1071; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 1072; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] 1073; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1074; CHECK-SD-NEXT: mov h0, v0.h[3] 1075; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 1076; CHECK-SD-NEXT: fcvt s0, h0 1077; CHECK-SD-NEXT: bl sinf 1078; CHECK-SD-NEXT: fcvt h0, s0 1079; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 1080; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] 1081; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1082; CHECK-SD-NEXT: mov h0, v0.h[4] 1083; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 1084; CHECK-SD-NEXT: fcvt s0, h0 1085; CHECK-SD-NEXT: bl sinf 1086; CHECK-SD-NEXT: fcvt h0, s0 1087; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 1088; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] 1089; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1090; CHECK-SD-NEXT: mov h0, v0.h[5] 1091; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 1092; CHECK-SD-NEXT: fcvt s0, h0 1093; CHECK-SD-NEXT: bl sinf 1094; CHECK-SD-NEXT: fcvt h0, s0 1095; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 1096; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] 1097; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1098; CHECK-SD-NEXT: mov h0, v0.h[6] 1099; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 1100; CHECK-SD-NEXT: fcvt s0, h0 1101; CHECK-SD-NEXT: bl sinf 1102; CHECK-SD-NEXT: fcvt h0, s0 1103; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 1104; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] 1105; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1106; CHECK-SD-NEXT: mov h0, v0.h[7] 1107; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 1108; CHECK-SD-NEXT: fcvt s0, h0 1109; CHECK-SD-NEXT: bl sinf 1110; CHECK-SD-NEXT: fmov s1, s0 1111; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 1112; CHECK-SD-NEXT: fcvt h2, s1 1113; CHECK-SD-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload 1114; CHECK-SD-NEXT: mov v1.h[7], v2.h[0] 1115; CHECK-SD-NEXT: add sp, sp, #64 1116; CHECK-SD-NEXT: ret 1117; 1118; CHECK-GI-LABEL: sin_v16f16: 1119; CHECK-GI: // %bb.0: // %entry 1120; CHECK-GI-NEXT: sub sp, sp, #320 1121; CHECK-GI-NEXT: stp d15, d14, [sp, #240] // 16-byte Folded Spill 1122; CHECK-GI-NEXT: stp d13, d12, [sp, #256] // 16-byte Folded Spill 1123; CHECK-GI-NEXT: stp d11, d10, [sp, #272] // 16-byte Folded Spill 1124; CHECK-GI-NEXT: stp d9, d8, [sp, #288] // 16-byte Folded Spill 1125; CHECK-GI-NEXT: stp x29, x30, [sp, #304] // 16-byte Folded Spill 1126; CHECK-GI-NEXT: .cfi_def_cfa_offset 320 1127; CHECK-GI-NEXT: .cfi_offset w30, -8 1128; CHECK-GI-NEXT: .cfi_offset w29, -16 1129; CHECK-GI-NEXT: .cfi_offset b8, -24 1130; CHECK-GI-NEXT: .cfi_offset b9, -32 1131; CHECK-GI-NEXT: .cfi_offset b10, -40 1132; CHECK-GI-NEXT: .cfi_offset b11, -48 1133; CHECK-GI-NEXT: .cfi_offset b12, -56 1134; CHECK-GI-NEXT: .cfi_offset b13, -64 1135; CHECK-GI-NEXT: .cfi_offset b14, -72 1136; CHECK-GI-NEXT: .cfi_offset b15, -80 1137; CHECK-GI-NEXT: mov v2.16b, v1.16b 1138; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Folded Spill 1139; CHECK-GI-NEXT: mov h14, v1.h[1] 1140; CHECK-GI-NEXT: mov h1, v1.h[2] 1141; CHECK-GI-NEXT: mov h15, v0.h[1] 1142; CHECK-GI-NEXT: mov h8, v0.h[2] 1143; CHECK-GI-NEXT: mov h9, v0.h[3] 1144; CHECK-GI-NEXT: mov h10, v0.h[4] 1145; CHECK-GI-NEXT: mov h11, v0.h[5] 1146; CHECK-GI-NEXT: mov h12, v0.h[6] 1147; CHECK-GI-NEXT: mov h13, v0.h[7] 1148; CHECK-GI-NEXT: fcvt s0, h0 1149; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Folded Spill 1150; CHECK-GI-NEXT: mov h1, v2.h[3] 1151; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Folded Spill 1152; CHECK-GI-NEXT: mov h1, v2.h[4] 1153; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Folded Spill 1154; CHECK-GI-NEXT: mov h1, v2.h[5] 1155; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Folded Spill 1156; CHECK-GI-NEXT: mov h1, v2.h[6] 1157; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Folded Spill 1158; CHECK-GI-NEXT: mov h1, v2.h[7] 1159; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Folded Spill 1160; CHECK-GI-NEXT: bl sinf 1161; CHECK-GI-NEXT: fcvt s1, h15 1162; CHECK-GI-NEXT: fcvt h0, s0 1163; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Folded Spill 1164; CHECK-GI-NEXT: fmov s0, s1 1165; CHECK-GI-NEXT: bl sinf 1166; CHECK-GI-NEXT: fcvt s1, h8 1167; CHECK-GI-NEXT: fcvt h0, s0 1168; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill 1169; CHECK-GI-NEXT: fmov s0, s1 1170; CHECK-GI-NEXT: bl sinf 1171; CHECK-GI-NEXT: fcvt s1, h9 1172; CHECK-GI-NEXT: fcvt h0, s0 1173; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Folded Spill 1174; CHECK-GI-NEXT: fmov s0, s1 1175; CHECK-GI-NEXT: bl sinf 1176; CHECK-GI-NEXT: fcvt s1, h10 1177; CHECK-GI-NEXT: fcvt h0, s0 1178; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Folded Spill 1179; CHECK-GI-NEXT: fmov s0, s1 1180; CHECK-GI-NEXT: bl sinf 1181; CHECK-GI-NEXT: fcvt s1, h11 1182; CHECK-GI-NEXT: fcvt h0, s0 1183; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Folded Spill 1184; CHECK-GI-NEXT: fmov s0, s1 1185; CHECK-GI-NEXT: bl sinf 1186; CHECK-GI-NEXT: fcvt s1, h12 1187; CHECK-GI-NEXT: fcvt h0, s0 1188; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Folded Spill 1189; CHECK-GI-NEXT: fmov s0, s1 1190; CHECK-GI-NEXT: bl sinf 1191; CHECK-GI-NEXT: fcvt s1, h13 1192; CHECK-GI-NEXT: fcvt h0, s0 1193; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Folded Spill 1194; CHECK-GI-NEXT: fmov s0, s1 1195; CHECK-GI-NEXT: bl sinf 1196; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload 1197; CHECK-GI-NEXT: fcvt h0, s0 1198; CHECK-GI-NEXT: fcvt s1, h1 1199; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill 1200; CHECK-GI-NEXT: fmov s0, s1 1201; CHECK-GI-NEXT: bl sinf 1202; CHECK-GI-NEXT: fcvt s1, h14 1203; CHECK-GI-NEXT: fcvt h0, s0 1204; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 1205; CHECK-GI-NEXT: fmov s0, s1 1206; CHECK-GI-NEXT: bl sinf 1207; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Folded Reload 1208; CHECK-GI-NEXT: fcvt h0, s0 1209; CHECK-GI-NEXT: fcvt s1, h1 1210; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1211; CHECK-GI-NEXT: fmov s0, s1 1212; CHECK-GI-NEXT: bl sinf 1213; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Folded Reload 1214; CHECK-GI-NEXT: fcvt h0, s0 1215; CHECK-GI-NEXT: fcvt s1, h1 1216; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 1217; CHECK-GI-NEXT: fmov s0, s1 1218; CHECK-GI-NEXT: bl sinf 1219; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload 1220; CHECK-GI-NEXT: fcvt h0, s0 1221; CHECK-GI-NEXT: fcvt s1, h1 1222; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill 1223; CHECK-GI-NEXT: fmov s0, s1 1224; CHECK-GI-NEXT: bl sinf 1225; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Folded Reload 1226; CHECK-GI-NEXT: fcvt h0, s0 1227; CHECK-GI-NEXT: fcvt s1, h1 1228; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 1229; CHECK-GI-NEXT: fmov s0, s1 1230; CHECK-GI-NEXT: bl sinf 1231; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Folded Reload 1232; CHECK-GI-NEXT: fcvt h0, s0 1233; CHECK-GI-NEXT: fcvt s1, h1 1234; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill 1235; CHECK-GI-NEXT: fmov s0, s1 1236; CHECK-GI-NEXT: bl sinf 1237; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Folded Reload 1238; CHECK-GI-NEXT: fcvt h0, s0 1239; CHECK-GI-NEXT: fcvt s1, h1 1240; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Folded Spill 1241; CHECK-GI-NEXT: fmov s0, s1 1242; CHECK-GI-NEXT: bl sinf 1243; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Folded Reload 1244; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Folded Reload 1245; CHECK-GI-NEXT: ldp x29, x30, [sp, #304] // 16-byte Folded Reload 1246; CHECK-GI-NEXT: mov v3.h[1], v2.h[0] 1247; CHECK-GI-NEXT: ldp q1, q2, [sp] // 32-byte Folded Reload 1248; CHECK-GI-NEXT: ldp d9, d8, [sp, #288] // 16-byte Folded Reload 1249; CHECK-GI-NEXT: ldp d11, d10, [sp, #272] // 16-byte Folded Reload 1250; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] 1251; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Folded Reload 1252; CHECK-GI-NEXT: ldp d13, d12, [sp, #256] // 16-byte Folded Reload 1253; CHECK-GI-NEXT: mov v3.h[2], v2.h[0] 1254; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload 1255; CHECK-GI-NEXT: ldp d15, d14, [sp, #240] // 16-byte Folded Reload 1256; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] 1257; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Folded Reload 1258; CHECK-GI-NEXT: mov v3.h[3], v2.h[0] 1259; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload 1260; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] 1261; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Folded Reload 1262; CHECK-GI-NEXT: mov v3.h[4], v2.h[0] 1263; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload 1264; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] 1265; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Folded Reload 1266; CHECK-GI-NEXT: mov v3.h[5], v2.h[0] 1267; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload 1268; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] 1269; CHECK-GI-NEXT: fcvt h2, s0 1270; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload 1271; CHECK-GI-NEXT: mov v3.h[6], v0.h[0] 1272; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload 1273; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] 1274; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload 1275; CHECK-GI-NEXT: mov v3.h[7], v0.h[0] 1276; CHECK-GI-NEXT: mov v1.h[7], v2.h[0] 1277; CHECK-GI-NEXT: mov v0.16b, v3.16b 1278; CHECK-GI-NEXT: add sp, sp, #320 1279; CHECK-GI-NEXT: ret 1280entry: 1281 %c = call <16 x half> @llvm.sin.v16f16(<16 x half> %a) 1282 ret <16 x half> %c 1283} 1284 1285define <2 x fp128> @sin_v2fp128(<2 x fp128> %a) { 1286; CHECK-LABEL: sin_v2fp128: 1287; CHECK: // %bb.0: // %entry 1288; CHECK-NEXT: sub sp, sp, #48 1289; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 1290; CHECK-NEXT: .cfi_def_cfa_offset 48 1291; CHECK-NEXT: .cfi_offset w30, -16 1292; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 1293; CHECK-NEXT: bl sinl 1294; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill 1295; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1296; CHECK-NEXT: bl sinl 1297; CHECK-NEXT: mov v1.16b, v0.16b 1298; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1299; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 1300; CHECK-NEXT: add sp, sp, #48 1301; CHECK-NEXT: ret 1302entry: 1303 %c = call <2 x fp128> @llvm.sin.v2fp128(<2 x fp128> %a) 1304 ret <2 x fp128> %c 1305} 1306 1307define double @cos_f64(double %a) { 1308; CHECK-LABEL: cos_f64: 1309; CHECK: // %bb.0: // %entry 1310; CHECK-NEXT: b cos 1311entry: 1312 %c = call double @llvm.cos.f64(double %a) 1313 ret double %c 1314} 1315 1316define float @cos_f32(float %a) { 1317; CHECK-LABEL: cos_f32: 1318; CHECK: // %bb.0: // %entry 1319; CHECK-NEXT: b cosf 1320entry: 1321 %c = call float @llvm.cos.f32(float %a) 1322 ret float %c 1323} 1324 1325define half @cos_f16(half %a) { 1326; CHECK-LABEL: cos_f16: 1327; CHECK: // %bb.0: // %entry 1328; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 1329; CHECK-NEXT: .cfi_def_cfa_offset 16 1330; CHECK-NEXT: .cfi_offset w30, -16 1331; CHECK-NEXT: fcvt s0, h0 1332; CHECK-NEXT: bl cosf 1333; CHECK-NEXT: fcvt h0, s0 1334; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 1335; CHECK-NEXT: ret 1336entry: 1337 %c = call half @llvm.cos.f16(half %a) 1338 ret half %c 1339} 1340 1341define fp128 @cos_fp128(fp128 %a) { 1342; CHECK-LABEL: cos_fp128: 1343; CHECK: // %bb.0: // %entry 1344; CHECK-NEXT: b cosl 1345entry: 1346 %c = call fp128 @llvm.cos.fp128(fp128 %a) 1347 ret fp128 %c 1348} 1349 1350define <1 x double> @cos_v1f64(<1 x double> %x) { 1351; CHECK-LABEL: cos_v1f64: 1352; CHECK: // %bb.0: 1353; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 1354; CHECK-NEXT: .cfi_def_cfa_offset 16 1355; CHECK-NEXT: .cfi_offset w30, -16 1356; CHECK-NEXT: bl cos 1357; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 1358; CHECK-NEXT: ret 1359 %c = call <1 x double> @llvm.cos.v1f64(<1 x double> %x) 1360 ret <1 x double> %c 1361} 1362 1363define <2 x double> @cos_v2f64(<2 x double> %a) { 1364; CHECK-SD-LABEL: cos_v2f64: 1365; CHECK-SD: // %bb.0: // %entry 1366; CHECK-SD-NEXT: sub sp, sp, #48 1367; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 1368; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 1369; CHECK-SD-NEXT: .cfi_offset w30, -16 1370; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 1371; CHECK-SD-NEXT: mov d0, v0.d[1] 1372; CHECK-SD-NEXT: bl cos 1373; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 1374; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1375; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1376; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 1377; CHECK-SD-NEXT: bl cos 1378; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 1379; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 1380; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 1381; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] 1382; CHECK-SD-NEXT: add sp, sp, #48 1383; CHECK-SD-NEXT: ret 1384; 1385; CHECK-GI-LABEL: cos_v2f64: 1386; CHECK-GI: // %bb.0: // %entry 1387; CHECK-GI-NEXT: sub sp, sp, #32 1388; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill 1389; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill 1390; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 1391; CHECK-GI-NEXT: .cfi_offset w30, -8 1392; CHECK-GI-NEXT: .cfi_offset b8, -16 1393; CHECK-GI-NEXT: mov d8, v0.d[1] 1394; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 1395; CHECK-GI-NEXT: bl cos 1396; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 1397; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 1398; CHECK-GI-NEXT: fmov d0, d8 1399; CHECK-GI-NEXT: bl cos 1400; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload 1401; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 1402; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload 1403; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload 1404; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] 1405; CHECK-GI-NEXT: mov v0.16b, v1.16b 1406; CHECK-GI-NEXT: add sp, sp, #32 1407; CHECK-GI-NEXT: ret 1408entry: 1409 %c = call <2 x double> @llvm.cos.v2f64(<2 x double> %a) 1410 ret <2 x double> %c 1411} 1412 1413define <3 x double> @cos_v3f64(<3 x double> %a) { 1414; CHECK-SD-LABEL: cos_v3f64: 1415; CHECK-SD: // %bb.0: // %entry 1416; CHECK-SD-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill 1417; CHECK-SD-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill 1418; CHECK-SD-NEXT: str x30, [sp, #24] // 8-byte Folded Spill 1419; CHECK-SD-NEXT: .cfi_def_cfa_offset 32 1420; CHECK-SD-NEXT: .cfi_offset w30, -8 1421; CHECK-SD-NEXT: .cfi_offset b8, -16 1422; CHECK-SD-NEXT: .cfi_offset b9, -24 1423; CHECK-SD-NEXT: .cfi_offset b10, -32 1424; CHECK-SD-NEXT: fmov d8, d2 1425; CHECK-SD-NEXT: fmov d9, d1 1426; CHECK-SD-NEXT: bl cos 1427; CHECK-SD-NEXT: fmov d10, d0 1428; CHECK-SD-NEXT: fmov d0, d9 1429; CHECK-SD-NEXT: bl cos 1430; CHECK-SD-NEXT: fmov d9, d0 1431; CHECK-SD-NEXT: fmov d0, d8 1432; CHECK-SD-NEXT: bl cos 1433; CHECK-SD-NEXT: fmov d1, d9 1434; CHECK-SD-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload 1435; CHECK-SD-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload 1436; CHECK-SD-NEXT: fmov d2, d0 1437; CHECK-SD-NEXT: fmov d0, d10 1438; CHECK-SD-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload 1439; CHECK-SD-NEXT: ret 1440; 1441; CHECK-GI-LABEL: cos_v3f64: 1442; CHECK-GI: // %bb.0: // %entry 1443; CHECK-GI-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill 1444; CHECK-GI-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill 1445; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill 1446; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 1447; CHECK-GI-NEXT: .cfi_offset w30, -8 1448; CHECK-GI-NEXT: .cfi_offset b8, -16 1449; CHECK-GI-NEXT: .cfi_offset b9, -24 1450; CHECK-GI-NEXT: .cfi_offset b10, -32 1451; CHECK-GI-NEXT: fmov d8, d1 1452; CHECK-GI-NEXT: fmov d9, d2 1453; CHECK-GI-NEXT: bl cos 1454; CHECK-GI-NEXT: fmov d10, d0 1455; CHECK-GI-NEXT: fmov d0, d8 1456; CHECK-GI-NEXT: bl cos 1457; CHECK-GI-NEXT: fmov d8, d0 1458; CHECK-GI-NEXT: fmov d0, d9 1459; CHECK-GI-NEXT: bl cos 1460; CHECK-GI-NEXT: fmov d1, d8 1461; CHECK-GI-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload 1462; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload 1463; CHECK-GI-NEXT: fmov d2, d0 1464; CHECK-GI-NEXT: fmov d0, d10 1465; CHECK-GI-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload 1466; CHECK-GI-NEXT: ret 1467entry: 1468 %c = call <3 x double> @llvm.cos.v3f64(<3 x double> %a) 1469 ret <3 x double> %c 1470} 1471 1472define <4 x double> @cos_v4f64(<4 x double> %a) { 1473; CHECK-SD-LABEL: cos_v4f64: 1474; CHECK-SD: // %bb.0: // %entry 1475; CHECK-SD-NEXT: sub sp, sp, #64 1476; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 1477; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 1478; CHECK-SD-NEXT: .cfi_offset w30, -16 1479; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 1480; CHECK-SD-NEXT: mov d0, v0.d[1] 1481; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 1482; CHECK-SD-NEXT: bl cos 1483; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 1484; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1485; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1486; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 1487; CHECK-SD-NEXT: bl cos 1488; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 1489; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 1490; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] 1491; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1492; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload 1493; CHECK-SD-NEXT: mov d0, v0.d[1] 1494; CHECK-SD-NEXT: bl cos 1495; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 1496; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 1497; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload 1498; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 1499; CHECK-SD-NEXT: bl cos 1500; CHECK-SD-NEXT: fmov d1, d0 1501; CHECK-SD-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload 1502; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 1503; CHECK-SD-NEXT: mov v1.d[1], v2.d[0] 1504; CHECK-SD-NEXT: add sp, sp, #64 1505; CHECK-SD-NEXT: ret 1506; 1507; CHECK-GI-LABEL: cos_v4f64: 1508; CHECK-GI: // %bb.0: // %entry 1509; CHECK-GI-NEXT: sub sp, sp, #80 1510; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 1511; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill 1512; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 1513; CHECK-GI-NEXT: .cfi_offset w30, -16 1514; CHECK-GI-NEXT: .cfi_offset b8, -24 1515; CHECK-GI-NEXT: .cfi_offset b9, -32 1516; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill 1517; CHECK-GI-NEXT: mov d8, v0.d[1] 1518; CHECK-GI-NEXT: mov d9, v1.d[1] 1519; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 1520; CHECK-GI-NEXT: bl cos 1521; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 1522; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 1523; CHECK-GI-NEXT: fmov d0, d8 1524; CHECK-GI-NEXT: bl cos 1525; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 1526; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1527; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1528; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 1529; CHECK-GI-NEXT: bl cos 1530; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 1531; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 1532; CHECK-GI-NEXT: fmov d0, d9 1533; CHECK-GI-NEXT: bl cos 1534; CHECK-GI-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload 1535; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 1536; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 1537; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 1538; CHECK-GI-NEXT: mov v2.d[1], v1.d[0] 1539; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload 1540; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] 1541; CHECK-GI-NEXT: mov v0.16b, v2.16b 1542; CHECK-GI-NEXT: add sp, sp, #80 1543; CHECK-GI-NEXT: ret 1544entry: 1545 %c = call <4 x double> @llvm.cos.v4f64(<4 x double> %a) 1546 ret <4 x double> %c 1547} 1548 1549define <2 x float> @cos_v2f32(<2 x float> %a) { 1550; CHECK-SD-LABEL: cos_v2f32: 1551; CHECK-SD: // %bb.0: // %entry 1552; CHECK-SD-NEXT: sub sp, sp, #48 1553; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 1554; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 1555; CHECK-SD-NEXT: .cfi_offset w30, -16 1556; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 1557; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 1558; CHECK-SD-NEXT: mov s0, v0.s[1] 1559; CHECK-SD-NEXT: bl cosf 1560; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1561; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1562; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1563; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 1564; CHECK-SD-NEXT: bl cosf 1565; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 1566; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1567; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 1568; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] 1569; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 1570; CHECK-SD-NEXT: add sp, sp, #48 1571; CHECK-SD-NEXT: ret 1572; 1573; CHECK-GI-LABEL: cos_v2f32: 1574; CHECK-GI: // %bb.0: // %entry 1575; CHECK-GI-NEXT: sub sp, sp, #32 1576; CHECK-GI-NEXT: str d8, [sp, #16] // 8-byte Folded Spill 1577; CHECK-GI-NEXT: str x30, [sp, #24] // 8-byte Folded Spill 1578; CHECK-GI-NEXT: .cfi_def_cfa_offset 32 1579; CHECK-GI-NEXT: .cfi_offset w30, -8 1580; CHECK-GI-NEXT: .cfi_offset b8, -16 1581; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 1582; CHECK-GI-NEXT: mov s8, v0.s[1] 1583; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 1584; CHECK-GI-NEXT: bl cosf 1585; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1586; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 1587; CHECK-GI-NEXT: fmov s0, s8 1588; CHECK-GI-NEXT: bl cosf 1589; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload 1590; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1591; CHECK-GI-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload 1592; CHECK-GI-NEXT: ldr d8, [sp, #16] // 8-byte Folded Reload 1593; CHECK-GI-NEXT: mov v1.s[1], v0.s[0] 1594; CHECK-GI-NEXT: fmov d0, d1 1595; CHECK-GI-NEXT: add sp, sp, #32 1596; CHECK-GI-NEXT: ret 1597entry: 1598 %c = call <2 x float> @llvm.cos.v2f32(<2 x float> %a) 1599 ret <2 x float> %c 1600} 1601 1602define <3 x float> @cos_v3f32(<3 x float> %a) { 1603; CHECK-SD-LABEL: cos_v3f32: 1604; CHECK-SD: // %bb.0: // %entry 1605; CHECK-SD-NEXT: sub sp, sp, #48 1606; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 1607; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 1608; CHECK-SD-NEXT: .cfi_offset w30, -16 1609; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1610; CHECK-SD-NEXT: mov s0, v0.s[1] 1611; CHECK-SD-NEXT: bl cosf 1612; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1613; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 1614; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1615; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 1616; CHECK-SD-NEXT: bl cosf 1617; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 1618; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1619; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] 1620; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 1621; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1622; CHECK-SD-NEXT: mov s0, v0.s[2] 1623; CHECK-SD-NEXT: bl cosf 1624; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 1625; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1626; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 1627; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] 1628; CHECK-SD-NEXT: mov v0.16b, v1.16b 1629; CHECK-SD-NEXT: add sp, sp, #48 1630; CHECK-SD-NEXT: ret 1631; 1632; CHECK-GI-LABEL: cos_v3f32: 1633; CHECK-GI: // %bb.0: // %entry 1634; CHECK-GI-NEXT: sub sp, sp, #64 1635; CHECK-GI-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill 1636; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 1637; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 1638; CHECK-GI-NEXT: .cfi_offset w30, -16 1639; CHECK-GI-NEXT: .cfi_offset b8, -24 1640; CHECK-GI-NEXT: .cfi_offset b9, -32 1641; CHECK-GI-NEXT: mov s8, v0.s[1] 1642; CHECK-GI-NEXT: mov s9, v0.s[2] 1643; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 1644; CHECK-GI-NEXT: bl cosf 1645; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1646; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1647; CHECK-GI-NEXT: fmov s0, s8 1648; CHECK-GI-NEXT: bl cosf 1649; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1650; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 1651; CHECK-GI-NEXT: fmov s0, s9 1652; CHECK-GI-NEXT: bl cosf 1653; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload 1654; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1655; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 1656; CHECK-GI-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload 1657; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] 1658; CHECK-GI-NEXT: mov v1.s[2], v0.s[0] 1659; CHECK-GI-NEXT: mov v0.16b, v1.16b 1660; CHECK-GI-NEXT: add sp, sp, #64 1661; CHECK-GI-NEXT: ret 1662entry: 1663 %c = call <3 x float> @llvm.cos.v3f32(<3 x float> %a) 1664 ret <3 x float> %c 1665} 1666 1667define <4 x float> @cos_v4f32(<4 x float> %a) { 1668; CHECK-SD-LABEL: cos_v4f32: 1669; CHECK-SD: // %bb.0: // %entry 1670; CHECK-SD-NEXT: sub sp, sp, #48 1671; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 1672; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 1673; CHECK-SD-NEXT: .cfi_offset w30, -16 1674; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1675; CHECK-SD-NEXT: mov s0, v0.s[1] 1676; CHECK-SD-NEXT: bl cosf 1677; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1678; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 1679; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1680; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 1681; CHECK-SD-NEXT: bl cosf 1682; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 1683; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1684; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] 1685; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 1686; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1687; CHECK-SD-NEXT: mov s0, v0.s[2] 1688; CHECK-SD-NEXT: bl cosf 1689; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 1690; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1691; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] 1692; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1693; CHECK-SD-NEXT: mov s0, v0.s[3] 1694; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill 1695; CHECK-SD-NEXT: bl cosf 1696; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 1697; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1698; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 1699; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] 1700; CHECK-SD-NEXT: mov v0.16b, v1.16b 1701; CHECK-SD-NEXT: add sp, sp, #48 1702; CHECK-SD-NEXT: ret 1703; 1704; CHECK-GI-LABEL: cos_v4f32: 1705; CHECK-GI: // %bb.0: // %entry 1706; CHECK-GI-NEXT: sub sp, sp, #80 1707; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill 1708; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill 1709; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill 1710; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 1711; CHECK-GI-NEXT: .cfi_offset w30, -8 1712; CHECK-GI-NEXT: .cfi_offset b8, -16 1713; CHECK-GI-NEXT: .cfi_offset b9, -24 1714; CHECK-GI-NEXT: .cfi_offset b10, -32 1715; CHECK-GI-NEXT: mov s8, v0.s[1] 1716; CHECK-GI-NEXT: mov s9, v0.s[2] 1717; CHECK-GI-NEXT: mov s10, v0.s[3] 1718; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 1719; CHECK-GI-NEXT: bl cosf 1720; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1721; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 1722; CHECK-GI-NEXT: fmov s0, s8 1723; CHECK-GI-NEXT: bl cosf 1724; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1725; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1726; CHECK-GI-NEXT: fmov s0, s9 1727; CHECK-GI-NEXT: bl cosf 1728; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1729; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 1730; CHECK-GI-NEXT: fmov s0, s10 1731; CHECK-GI-NEXT: bl cosf 1732; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload 1733; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1734; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload 1735; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload 1736; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload 1737; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] 1738; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload 1739; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] 1740; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] 1741; CHECK-GI-NEXT: mov v0.16b, v1.16b 1742; CHECK-GI-NEXT: add sp, sp, #80 1743; CHECK-GI-NEXT: ret 1744entry: 1745 %c = call <4 x float> @llvm.cos.v4f32(<4 x float> %a) 1746 ret <4 x float> %c 1747} 1748 1749define <8 x float> @cos_v8f32(<8 x float> %a) { 1750; CHECK-SD-LABEL: cos_v8f32: 1751; CHECK-SD: // %bb.0: // %entry 1752; CHECK-SD-NEXT: sub sp, sp, #64 1753; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 1754; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 1755; CHECK-SD-NEXT: .cfi_offset w30, -16 1756; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill 1757; CHECK-SD-NEXT: mov s0, v0.s[1] 1758; CHECK-SD-NEXT: bl cosf 1759; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1760; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 1761; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1762; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 1763; CHECK-SD-NEXT: bl cosf 1764; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1765; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1766; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] 1767; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 1768; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1769; CHECK-SD-NEXT: mov s0, v0.s[2] 1770; CHECK-SD-NEXT: bl cosf 1771; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1772; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1773; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] 1774; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1775; CHECK-SD-NEXT: mov s0, v0.s[3] 1776; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 1777; CHECK-SD-NEXT: bl cosf 1778; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1779; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1780; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] 1781; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1782; CHECK-SD-NEXT: mov s0, v0.s[1] 1783; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 1784; CHECK-SD-NEXT: bl cosf 1785; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1786; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 1787; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1788; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 1789; CHECK-SD-NEXT: bl cosf 1790; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 1791; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1792; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] 1793; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 1794; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1795; CHECK-SD-NEXT: mov s0, v0.s[2] 1796; CHECK-SD-NEXT: bl cosf 1797; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 1798; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 1799; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] 1800; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1801; CHECK-SD-NEXT: mov s0, v0.s[3] 1802; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill 1803; CHECK-SD-NEXT: bl cosf 1804; CHECK-SD-NEXT: fmov s2, s0 1805; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 1806; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload 1807; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 1808; CHECK-SD-NEXT: mov v1.s[3], v2.s[0] 1809; CHECK-SD-NEXT: add sp, sp, #64 1810; CHECK-SD-NEXT: ret 1811; 1812; CHECK-GI-LABEL: cos_v8f32: 1813; CHECK-GI: // %bb.0: // %entry 1814; CHECK-GI-NEXT: sub sp, sp, #176 1815; CHECK-GI-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill 1816; CHECK-GI-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill 1817; CHECK-GI-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill 1818; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill 1819; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 1820; CHECK-GI-NEXT: .cfi_offset w30, -16 1821; CHECK-GI-NEXT: .cfi_offset b8, -24 1822; CHECK-GI-NEXT: .cfi_offset b9, -32 1823; CHECK-GI-NEXT: .cfi_offset b10, -40 1824; CHECK-GI-NEXT: .cfi_offset b11, -48 1825; CHECK-GI-NEXT: .cfi_offset b12, -56 1826; CHECK-GI-NEXT: .cfi_offset b13, -64 1827; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 1828; CHECK-GI-NEXT: mov s8, v0.s[1] 1829; CHECK-GI-NEXT: mov s9, v0.s[2] 1830; CHECK-GI-NEXT: mov s10, v0.s[3] 1831; CHECK-GI-NEXT: mov s11, v1.s[1] 1832; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 1833; CHECK-GI-NEXT: mov s12, v1.s[2] 1834; CHECK-GI-NEXT: mov s13, v1.s[3] 1835; CHECK-GI-NEXT: bl cosf 1836; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1837; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 1838; CHECK-GI-NEXT: fmov s0, s8 1839; CHECK-GI-NEXT: bl cosf 1840; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1841; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill 1842; CHECK-GI-NEXT: fmov s0, s9 1843; CHECK-GI-NEXT: bl cosf 1844; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1845; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill 1846; CHECK-GI-NEXT: fmov s0, s10 1847; CHECK-GI-NEXT: bl cosf 1848; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1849; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill 1850; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload 1851; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 1852; CHECK-GI-NEXT: bl cosf 1853; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1854; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 1855; CHECK-GI-NEXT: fmov s0, s11 1856; CHECK-GI-NEXT: bl cosf 1857; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1858; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1859; CHECK-GI-NEXT: fmov s0, s12 1860; CHECK-GI-NEXT: bl cosf 1861; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1862; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 1863; CHECK-GI-NEXT: fmov s0, s13 1864; CHECK-GI-NEXT: bl cosf 1865; CHECK-GI-NEXT: ldp q2, q1, [sp, #48] // 32-byte Folded Reload 1866; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 1867; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload 1868; CHECK-GI-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload 1869; CHECK-GI-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload 1870; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] 1871; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload 1872; CHECK-GI-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload 1873; CHECK-GI-NEXT: mov v3.s[1], v2.s[0] 1874; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload 1875; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] 1876; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload 1877; CHECK-GI-NEXT: mov v3.s[2], v2.s[0] 1878; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload 1879; CHECK-GI-NEXT: mov v1.s[3], v2.s[0] 1880; CHECK-GI-NEXT: mov v3.s[3], v0.s[0] 1881; CHECK-GI-NEXT: mov v2.16b, v1.16b 1882; CHECK-GI-NEXT: mov v1.16b, v3.16b 1883; CHECK-GI-NEXT: mov v0.16b, v2.16b 1884; CHECK-GI-NEXT: add sp, sp, #176 1885; CHECK-GI-NEXT: ret 1886entry: 1887 %c = call <8 x float> @llvm.cos.v8f32(<8 x float> %a) 1888 ret <8 x float> %c 1889} 1890 1891define <7 x half> @cos_v7f16(<7 x half> %a) { 1892; CHECK-SD-LABEL: cos_v7f16: 1893; CHECK-SD: // %bb.0: // %entry 1894; CHECK-SD-NEXT: sub sp, sp, #48 1895; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 1896; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 1897; CHECK-SD-NEXT: .cfi_offset w30, -16 1898; CHECK-SD-NEXT: mov h1, v0.h[1] 1899; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 1900; CHECK-SD-NEXT: fcvt s0, h1 1901; CHECK-SD-NEXT: bl cosf 1902; CHECK-SD-NEXT: fcvt h0, s0 1903; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1904; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1905; CHECK-SD-NEXT: fcvt s0, h0 1906; CHECK-SD-NEXT: bl cosf 1907; CHECK-SD-NEXT: fcvt h0, s0 1908; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 1909; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] 1910; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1911; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1912; CHECK-SD-NEXT: mov h0, v0.h[2] 1913; CHECK-SD-NEXT: fcvt s0, h0 1914; CHECK-SD-NEXT: bl cosf 1915; CHECK-SD-NEXT: fcvt h0, s0 1916; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 1917; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] 1918; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1919; CHECK-SD-NEXT: mov h0, v0.h[3] 1920; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 1921; CHECK-SD-NEXT: fcvt s0, h0 1922; CHECK-SD-NEXT: bl cosf 1923; CHECK-SD-NEXT: fcvt h0, s0 1924; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 1925; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] 1926; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1927; CHECK-SD-NEXT: mov h0, v0.h[4] 1928; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 1929; CHECK-SD-NEXT: fcvt s0, h0 1930; CHECK-SD-NEXT: bl cosf 1931; CHECK-SD-NEXT: fcvt h0, s0 1932; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 1933; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] 1934; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1935; CHECK-SD-NEXT: mov h0, v0.h[5] 1936; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 1937; CHECK-SD-NEXT: fcvt s0, h0 1938; CHECK-SD-NEXT: bl cosf 1939; CHECK-SD-NEXT: fcvt h0, s0 1940; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 1941; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] 1942; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1943; CHECK-SD-NEXT: mov h0, v0.h[6] 1944; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 1945; CHECK-SD-NEXT: fcvt s0, h0 1946; CHECK-SD-NEXT: bl cosf 1947; CHECK-SD-NEXT: fcvt h0, s0 1948; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 1949; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] 1950; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1951; CHECK-SD-NEXT: mov h0, v0.h[7] 1952; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 1953; CHECK-SD-NEXT: fcvt s0, h0 1954; CHECK-SD-NEXT: bl cosf 1955; CHECK-SD-NEXT: fcvt h1, s0 1956; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1957; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 1958; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] 1959; CHECK-SD-NEXT: add sp, sp, #48 1960; CHECK-SD-NEXT: ret 1961; 1962; CHECK-GI-LABEL: cos_v7f16: 1963; CHECK-GI: // %bb.0: // %entry 1964; CHECK-GI-NEXT: sub sp, sp, #160 1965; CHECK-GI-NEXT: stp d13, d12, [sp, #96] // 16-byte Folded Spill 1966; CHECK-GI-NEXT: stp d11, d10, [sp, #112] // 16-byte Folded Spill 1967; CHECK-GI-NEXT: stp d9, d8, [sp, #128] // 16-byte Folded Spill 1968; CHECK-GI-NEXT: str x30, [sp, #144] // 8-byte Folded Spill 1969; CHECK-GI-NEXT: .cfi_def_cfa_offset 160 1970; CHECK-GI-NEXT: .cfi_offset w30, -16 1971; CHECK-GI-NEXT: .cfi_offset b8, -24 1972; CHECK-GI-NEXT: .cfi_offset b9, -32 1973; CHECK-GI-NEXT: .cfi_offset b10, -40 1974; CHECK-GI-NEXT: .cfi_offset b11, -48 1975; CHECK-GI-NEXT: .cfi_offset b12, -56 1976; CHECK-GI-NEXT: .cfi_offset b13, -64 1977; CHECK-GI-NEXT: mov h8, v0.h[1] 1978; CHECK-GI-NEXT: mov h9, v0.h[2] 1979; CHECK-GI-NEXT: mov h10, v0.h[3] 1980; CHECK-GI-NEXT: mov h11, v0.h[4] 1981; CHECK-GI-NEXT: mov h12, v0.h[5] 1982; CHECK-GI-NEXT: mov h13, v0.h[6] 1983; CHECK-GI-NEXT: fcvt s0, h0 1984; CHECK-GI-NEXT: bl cosf 1985; CHECK-GI-NEXT: fcvt s1, h8 1986; CHECK-GI-NEXT: fcvt h0, s0 1987; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill 1988; CHECK-GI-NEXT: fmov s0, s1 1989; CHECK-GI-NEXT: bl cosf 1990; CHECK-GI-NEXT: fcvt s1, h9 1991; CHECK-GI-NEXT: fcvt h0, s0 1992; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill 1993; CHECK-GI-NEXT: fmov s0, s1 1994; CHECK-GI-NEXT: bl cosf 1995; CHECK-GI-NEXT: fcvt s1, h10 1996; CHECK-GI-NEXT: fcvt h0, s0 1997; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 1998; CHECK-GI-NEXT: fmov s0, s1 1999; CHECK-GI-NEXT: bl cosf 2000; CHECK-GI-NEXT: fcvt s1, h11 2001; CHECK-GI-NEXT: fcvt h0, s0 2002; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 2003; CHECK-GI-NEXT: fmov s0, s1 2004; CHECK-GI-NEXT: bl cosf 2005; CHECK-GI-NEXT: fcvt s1, h12 2006; CHECK-GI-NEXT: fcvt h0, s0 2007; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 2008; CHECK-GI-NEXT: fmov s0, s1 2009; CHECK-GI-NEXT: bl cosf 2010; CHECK-GI-NEXT: fcvt s1, h13 2011; CHECK-GI-NEXT: fcvt h0, s0 2012; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 2013; CHECK-GI-NEXT: fmov s0, s1 2014; CHECK-GI-NEXT: bl cosf 2015; CHECK-GI-NEXT: ldp q3, q2, [sp, #48] // 32-byte Folded Reload 2016; CHECK-GI-NEXT: fcvt h0, s0 2017; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload 2018; CHECK-GI-NEXT: ldp d9, d8, [sp, #128] // 16-byte Folded Reload 2019; CHECK-GI-NEXT: ldp d11, d10, [sp, #112] // 16-byte Folded Reload 2020; CHECK-GI-NEXT: ldr x30, [sp, #144] // 8-byte Folded Reload 2021; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] 2022; CHECK-GI-NEXT: ldp d13, d12, [sp, #96] // 16-byte Folded Reload 2023; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] 2024; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload 2025; CHECK-GI-NEXT: mov v1.h[3], v3.h[0] 2026; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] 2027; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload 2028; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] 2029; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] 2030; CHECK-GI-NEXT: mov v0.16b, v1.16b 2031; CHECK-GI-NEXT: add sp, sp, #160 2032; CHECK-GI-NEXT: ret 2033entry: 2034 %c = call <7 x half> @llvm.cos.v7f16(<7 x half> %a) 2035 ret <7 x half> %c 2036} 2037 2038define <4 x half> @cos_v4f16(<4 x half> %a) { 2039; CHECK-SD-LABEL: cos_v4f16: 2040; CHECK-SD: // %bb.0: // %entry 2041; CHECK-SD-NEXT: sub sp, sp, #48 2042; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 2043; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 2044; CHECK-SD-NEXT: .cfi_offset w30, -16 2045; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 2046; CHECK-SD-NEXT: mov h1, v0.h[1] 2047; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 2048; CHECK-SD-NEXT: fcvt s0, h1 2049; CHECK-SD-NEXT: bl cosf 2050; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 2051; CHECK-SD-NEXT: fcvt h0, s0 2052; CHECK-SD-NEXT: fcvt s1, h1 2053; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 2054; CHECK-SD-NEXT: fmov s0, s1 2055; CHECK-SD-NEXT: bl cosf 2056; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 2057; CHECK-SD-NEXT: fcvt h2, s0 2058; CHECK-SD-NEXT: mov h1, v1.h[2] 2059; CHECK-SD-NEXT: fcvt s0, h1 2060; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 2061; CHECK-SD-NEXT: mov v2.h[1], v1.h[0] 2062; CHECK-SD-NEXT: str q2, [sp] // 16-byte Folded Spill 2063; CHECK-SD-NEXT: bl cosf 2064; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 2065; CHECK-SD-NEXT: fcvt h2, s0 2066; CHECK-SD-NEXT: mov h1, v1.h[3] 2067; CHECK-SD-NEXT: fcvt s0, h1 2068; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 2069; CHECK-SD-NEXT: mov v1.h[2], v2.h[0] 2070; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill 2071; CHECK-SD-NEXT: bl cosf 2072; CHECK-SD-NEXT: fcvt h1, s0 2073; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2074; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 2075; CHECK-SD-NEXT: mov v0.h[3], v1.h[0] 2076; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 2077; CHECK-SD-NEXT: add sp, sp, #48 2078; CHECK-SD-NEXT: ret 2079; 2080; CHECK-GI-LABEL: cos_v4f16: 2081; CHECK-GI: // %bb.0: // %entry 2082; CHECK-GI-NEXT: sub sp, sp, #80 2083; CHECK-GI-NEXT: str d10, [sp, #48] // 8-byte Folded Spill 2084; CHECK-GI-NEXT: stp d9, d8, [sp, #56] // 16-byte Folded Spill 2085; CHECK-GI-NEXT: str x30, [sp, #72] // 8-byte Folded Spill 2086; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 2087; CHECK-GI-NEXT: .cfi_offset w30, -8 2088; CHECK-GI-NEXT: .cfi_offset b8, -16 2089; CHECK-GI-NEXT: .cfi_offset b9, -24 2090; CHECK-GI-NEXT: .cfi_offset b10, -32 2091; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 2092; CHECK-GI-NEXT: mov h8, v0.h[1] 2093; CHECK-GI-NEXT: mov h9, v0.h[2] 2094; CHECK-GI-NEXT: mov h10, v0.h[3] 2095; CHECK-GI-NEXT: fcvt s0, h0 2096; CHECK-GI-NEXT: bl cosf 2097; CHECK-GI-NEXT: fcvt s1, h8 2098; CHECK-GI-NEXT: fcvt h0, s0 2099; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 2100; CHECK-GI-NEXT: fmov s0, s1 2101; CHECK-GI-NEXT: bl cosf 2102; CHECK-GI-NEXT: fcvt s1, h9 2103; CHECK-GI-NEXT: fcvt h0, s0 2104; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 2105; CHECK-GI-NEXT: fmov s0, s1 2106; CHECK-GI-NEXT: bl cosf 2107; CHECK-GI-NEXT: fcvt s1, h10 2108; CHECK-GI-NEXT: fcvt h0, s0 2109; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 2110; CHECK-GI-NEXT: fmov s0, s1 2111; CHECK-GI-NEXT: bl cosf 2112; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload 2113; CHECK-GI-NEXT: fcvt h0, s0 2114; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 2115; CHECK-GI-NEXT: ldp d9, d8, [sp, #56] // 16-byte Folded Reload 2116; CHECK-GI-NEXT: ldr x30, [sp, #72] // 8-byte Folded Reload 2117; CHECK-GI-NEXT: ldr d10, [sp, #48] // 8-byte Folded Reload 2118; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] 2119; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] 2120; CHECK-GI-NEXT: mov v1.h[3], v0.h[0] 2121; CHECK-GI-NEXT: mov v0.16b, v1.16b 2122; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 2123; CHECK-GI-NEXT: add sp, sp, #80 2124; CHECK-GI-NEXT: ret 2125entry: 2126 %c = call <4 x half> @llvm.cos.v4f16(<4 x half> %a) 2127 ret <4 x half> %c 2128} 2129 2130define <8 x half> @cos_v8f16(<8 x half> %a) { 2131; CHECK-SD-LABEL: cos_v8f16: 2132; CHECK-SD: // %bb.0: // %entry 2133; CHECK-SD-NEXT: sub sp, sp, #48 2134; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 2135; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 2136; CHECK-SD-NEXT: .cfi_offset w30, -16 2137; CHECK-SD-NEXT: mov h1, v0.h[1] 2138; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 2139; CHECK-SD-NEXT: fcvt s0, h1 2140; CHECK-SD-NEXT: bl cosf 2141; CHECK-SD-NEXT: fcvt h0, s0 2142; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 2143; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2144; CHECK-SD-NEXT: fcvt s0, h0 2145; CHECK-SD-NEXT: bl cosf 2146; CHECK-SD-NEXT: fcvt h0, s0 2147; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 2148; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] 2149; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 2150; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2151; CHECK-SD-NEXT: mov h0, v0.h[2] 2152; CHECK-SD-NEXT: fcvt s0, h0 2153; CHECK-SD-NEXT: bl cosf 2154; CHECK-SD-NEXT: fcvt h0, s0 2155; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 2156; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] 2157; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2158; CHECK-SD-NEXT: mov h0, v0.h[3] 2159; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 2160; CHECK-SD-NEXT: fcvt s0, h0 2161; CHECK-SD-NEXT: bl cosf 2162; CHECK-SD-NEXT: fcvt h0, s0 2163; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 2164; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] 2165; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2166; CHECK-SD-NEXT: mov h0, v0.h[4] 2167; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 2168; CHECK-SD-NEXT: fcvt s0, h0 2169; CHECK-SD-NEXT: bl cosf 2170; CHECK-SD-NEXT: fcvt h0, s0 2171; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 2172; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] 2173; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2174; CHECK-SD-NEXT: mov h0, v0.h[5] 2175; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 2176; CHECK-SD-NEXT: fcvt s0, h0 2177; CHECK-SD-NEXT: bl cosf 2178; CHECK-SD-NEXT: fcvt h0, s0 2179; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 2180; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] 2181; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2182; CHECK-SD-NEXT: mov h0, v0.h[6] 2183; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 2184; CHECK-SD-NEXT: fcvt s0, h0 2185; CHECK-SD-NEXT: bl cosf 2186; CHECK-SD-NEXT: fcvt h0, s0 2187; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 2188; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] 2189; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2190; CHECK-SD-NEXT: mov h0, v0.h[7] 2191; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 2192; CHECK-SD-NEXT: fcvt s0, h0 2193; CHECK-SD-NEXT: bl cosf 2194; CHECK-SD-NEXT: fcvt h1, s0 2195; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 2196; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 2197; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] 2198; CHECK-SD-NEXT: add sp, sp, #48 2199; CHECK-SD-NEXT: ret 2200; 2201; CHECK-GI-LABEL: cos_v8f16: 2202; CHECK-GI: // %bb.0: // %entry 2203; CHECK-GI-NEXT: sub sp, sp, #176 2204; CHECK-GI-NEXT: str d14, [sp, #112] // 8-byte Folded Spill 2205; CHECK-GI-NEXT: stp d13, d12, [sp, #120] // 16-byte Folded Spill 2206; CHECK-GI-NEXT: stp d11, d10, [sp, #136] // 16-byte Folded Spill 2207; CHECK-GI-NEXT: stp d9, d8, [sp, #152] // 16-byte Folded Spill 2208; CHECK-GI-NEXT: str x30, [sp, #168] // 8-byte Folded Spill 2209; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 2210; CHECK-GI-NEXT: .cfi_offset w30, -8 2211; CHECK-GI-NEXT: .cfi_offset b8, -16 2212; CHECK-GI-NEXT: .cfi_offset b9, -24 2213; CHECK-GI-NEXT: .cfi_offset b10, -32 2214; CHECK-GI-NEXT: .cfi_offset b11, -40 2215; CHECK-GI-NEXT: .cfi_offset b12, -48 2216; CHECK-GI-NEXT: .cfi_offset b13, -56 2217; CHECK-GI-NEXT: .cfi_offset b14, -64 2218; CHECK-GI-NEXT: mov h8, v0.h[1] 2219; CHECK-GI-NEXT: mov h9, v0.h[2] 2220; CHECK-GI-NEXT: mov h10, v0.h[3] 2221; CHECK-GI-NEXT: mov h11, v0.h[4] 2222; CHECK-GI-NEXT: mov h12, v0.h[5] 2223; CHECK-GI-NEXT: mov h13, v0.h[6] 2224; CHECK-GI-NEXT: mov h14, v0.h[7] 2225; CHECK-GI-NEXT: fcvt s0, h0 2226; CHECK-GI-NEXT: bl cosf 2227; CHECK-GI-NEXT: fcvt s1, h8 2228; CHECK-GI-NEXT: fcvt h0, s0 2229; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill 2230; CHECK-GI-NEXT: fmov s0, s1 2231; CHECK-GI-NEXT: bl cosf 2232; CHECK-GI-NEXT: fcvt s1, h9 2233; CHECK-GI-NEXT: fcvt h0, s0 2234; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 2235; CHECK-GI-NEXT: fmov s0, s1 2236; CHECK-GI-NEXT: bl cosf 2237; CHECK-GI-NEXT: fcvt s1, h10 2238; CHECK-GI-NEXT: fcvt h0, s0 2239; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill 2240; CHECK-GI-NEXT: fmov s0, s1 2241; CHECK-GI-NEXT: bl cosf 2242; CHECK-GI-NEXT: fcvt s1, h11 2243; CHECK-GI-NEXT: fcvt h0, s0 2244; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill 2245; CHECK-GI-NEXT: fmov s0, s1 2246; CHECK-GI-NEXT: bl cosf 2247; CHECK-GI-NEXT: fcvt s1, h12 2248; CHECK-GI-NEXT: fcvt h0, s0 2249; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 2250; CHECK-GI-NEXT: fmov s0, s1 2251; CHECK-GI-NEXT: bl cosf 2252; CHECK-GI-NEXT: fcvt s1, h13 2253; CHECK-GI-NEXT: fcvt h0, s0 2254; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 2255; CHECK-GI-NEXT: fmov s0, s1 2256; CHECK-GI-NEXT: bl cosf 2257; CHECK-GI-NEXT: fcvt s1, h14 2258; CHECK-GI-NEXT: fcvt h0, s0 2259; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 2260; CHECK-GI-NEXT: fmov s0, s1 2261; CHECK-GI-NEXT: bl cosf 2262; CHECK-GI-NEXT: ldp q3, q2, [sp, #64] // 32-byte Folded Reload 2263; CHECK-GI-NEXT: fcvt h0, s0 2264; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload 2265; CHECK-GI-NEXT: ldp d9, d8, [sp, #152] // 16-byte Folded Reload 2266; CHECK-GI-NEXT: ldp d11, d10, [sp, #136] // 16-byte Folded Reload 2267; CHECK-GI-NEXT: ldr x30, [sp, #168] // 8-byte Folded Reload 2268; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] 2269; CHECK-GI-NEXT: ldr d14, [sp, #112] // 8-byte Folded Reload 2270; CHECK-GI-NEXT: ldp d13, d12, [sp, #120] // 16-byte Folded Reload 2271; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] 2272; CHECK-GI-NEXT: ldp q2, q3, [sp, #32] // 32-byte Folded Reload 2273; CHECK-GI-NEXT: mov v1.h[3], v3.h[0] 2274; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] 2275; CHECK-GI-NEXT: ldp q2, q3, [sp] // 32-byte Folded Reload 2276; CHECK-GI-NEXT: mov v1.h[5], v3.h[0] 2277; CHECK-GI-NEXT: mov v1.h[6], v2.h[0] 2278; CHECK-GI-NEXT: mov v1.h[7], v0.h[0] 2279; CHECK-GI-NEXT: mov v0.16b, v1.16b 2280; CHECK-GI-NEXT: add sp, sp, #176 2281; CHECK-GI-NEXT: ret 2282entry: 2283 %c = call <8 x half> @llvm.cos.v8f16(<8 x half> %a) 2284 ret <8 x half> %c 2285} 2286 2287define <16 x half> @cos_v16f16(<16 x half> %a) { 2288; CHECK-SD-LABEL: cos_v16f16: 2289; CHECK-SD: // %bb.0: // %entry 2290; CHECK-SD-NEXT: sub sp, sp, #64 2291; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 2292; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 2293; CHECK-SD-NEXT: .cfi_offset w30, -16 2294; CHECK-SD-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill 2295; CHECK-SD-NEXT: mov h1, v0.h[1] 2296; CHECK-SD-NEXT: fcvt s0, h1 2297; CHECK-SD-NEXT: bl cosf 2298; CHECK-SD-NEXT: fcvt h0, s0 2299; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 2300; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 2301; CHECK-SD-NEXT: fcvt s0, h0 2302; CHECK-SD-NEXT: bl cosf 2303; CHECK-SD-NEXT: fcvt h0, s0 2304; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 2305; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] 2306; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 2307; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 2308; CHECK-SD-NEXT: mov h0, v0.h[2] 2309; CHECK-SD-NEXT: fcvt s0, h0 2310; CHECK-SD-NEXT: bl cosf 2311; CHECK-SD-NEXT: fcvt h0, s0 2312; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 2313; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] 2314; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 2315; CHECK-SD-NEXT: mov h0, v0.h[3] 2316; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 2317; CHECK-SD-NEXT: fcvt s0, h0 2318; CHECK-SD-NEXT: bl cosf 2319; CHECK-SD-NEXT: fcvt h0, s0 2320; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 2321; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] 2322; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 2323; CHECK-SD-NEXT: mov h0, v0.h[4] 2324; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 2325; CHECK-SD-NEXT: fcvt s0, h0 2326; CHECK-SD-NEXT: bl cosf 2327; CHECK-SD-NEXT: fcvt h0, s0 2328; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 2329; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] 2330; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 2331; CHECK-SD-NEXT: mov h0, v0.h[5] 2332; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 2333; CHECK-SD-NEXT: fcvt s0, h0 2334; CHECK-SD-NEXT: bl cosf 2335; CHECK-SD-NEXT: fcvt h0, s0 2336; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 2337; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] 2338; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 2339; CHECK-SD-NEXT: mov h0, v0.h[6] 2340; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 2341; CHECK-SD-NEXT: fcvt s0, h0 2342; CHECK-SD-NEXT: bl cosf 2343; CHECK-SD-NEXT: fcvt h0, s0 2344; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 2345; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] 2346; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 2347; CHECK-SD-NEXT: mov h0, v0.h[7] 2348; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 2349; CHECK-SD-NEXT: fcvt s0, h0 2350; CHECK-SD-NEXT: bl cosf 2351; CHECK-SD-NEXT: fcvt h0, s0 2352; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 2353; CHECK-SD-NEXT: mov v1.h[7], v0.h[0] 2354; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2355; CHECK-SD-NEXT: mov h0, v0.h[1] 2356; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 2357; CHECK-SD-NEXT: fcvt s0, h0 2358; CHECK-SD-NEXT: bl cosf 2359; CHECK-SD-NEXT: fcvt h0, s0 2360; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 2361; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2362; CHECK-SD-NEXT: fcvt s0, h0 2363; CHECK-SD-NEXT: bl cosf 2364; CHECK-SD-NEXT: fcvt h0, s0 2365; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 2366; CHECK-SD-NEXT: mov v0.h[1], v1.h[0] 2367; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 2368; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2369; CHECK-SD-NEXT: mov h0, v0.h[2] 2370; CHECK-SD-NEXT: fcvt s0, h0 2371; CHECK-SD-NEXT: bl cosf 2372; CHECK-SD-NEXT: fcvt h0, s0 2373; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 2374; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] 2375; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2376; CHECK-SD-NEXT: mov h0, v0.h[3] 2377; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 2378; CHECK-SD-NEXT: fcvt s0, h0 2379; CHECK-SD-NEXT: bl cosf 2380; CHECK-SD-NEXT: fcvt h0, s0 2381; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 2382; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] 2383; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2384; CHECK-SD-NEXT: mov h0, v0.h[4] 2385; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 2386; CHECK-SD-NEXT: fcvt s0, h0 2387; CHECK-SD-NEXT: bl cosf 2388; CHECK-SD-NEXT: fcvt h0, s0 2389; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 2390; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] 2391; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2392; CHECK-SD-NEXT: mov h0, v0.h[5] 2393; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 2394; CHECK-SD-NEXT: fcvt s0, h0 2395; CHECK-SD-NEXT: bl cosf 2396; CHECK-SD-NEXT: fcvt h0, s0 2397; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 2398; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] 2399; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2400; CHECK-SD-NEXT: mov h0, v0.h[6] 2401; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 2402; CHECK-SD-NEXT: fcvt s0, h0 2403; CHECK-SD-NEXT: bl cosf 2404; CHECK-SD-NEXT: fcvt h0, s0 2405; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 2406; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] 2407; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2408; CHECK-SD-NEXT: mov h0, v0.h[7] 2409; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 2410; CHECK-SD-NEXT: fcvt s0, h0 2411; CHECK-SD-NEXT: bl cosf 2412; CHECK-SD-NEXT: fmov s1, s0 2413; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 2414; CHECK-SD-NEXT: fcvt h2, s1 2415; CHECK-SD-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload 2416; CHECK-SD-NEXT: mov v1.h[7], v2.h[0] 2417; CHECK-SD-NEXT: add sp, sp, #64 2418; CHECK-SD-NEXT: ret 2419; 2420; CHECK-GI-LABEL: cos_v16f16: 2421; CHECK-GI: // %bb.0: // %entry 2422; CHECK-GI-NEXT: sub sp, sp, #320 2423; CHECK-GI-NEXT: stp d15, d14, [sp, #240] // 16-byte Folded Spill 2424; CHECK-GI-NEXT: stp d13, d12, [sp, #256] // 16-byte Folded Spill 2425; CHECK-GI-NEXT: stp d11, d10, [sp, #272] // 16-byte Folded Spill 2426; CHECK-GI-NEXT: stp d9, d8, [sp, #288] // 16-byte Folded Spill 2427; CHECK-GI-NEXT: stp x29, x30, [sp, #304] // 16-byte Folded Spill 2428; CHECK-GI-NEXT: .cfi_def_cfa_offset 320 2429; CHECK-GI-NEXT: .cfi_offset w30, -8 2430; CHECK-GI-NEXT: .cfi_offset w29, -16 2431; CHECK-GI-NEXT: .cfi_offset b8, -24 2432; CHECK-GI-NEXT: .cfi_offset b9, -32 2433; CHECK-GI-NEXT: .cfi_offset b10, -40 2434; CHECK-GI-NEXT: .cfi_offset b11, -48 2435; CHECK-GI-NEXT: .cfi_offset b12, -56 2436; CHECK-GI-NEXT: .cfi_offset b13, -64 2437; CHECK-GI-NEXT: .cfi_offset b14, -72 2438; CHECK-GI-NEXT: .cfi_offset b15, -80 2439; CHECK-GI-NEXT: mov v2.16b, v1.16b 2440; CHECK-GI-NEXT: str q1, [sp, #80] // 16-byte Folded Spill 2441; CHECK-GI-NEXT: mov h14, v1.h[1] 2442; CHECK-GI-NEXT: mov h1, v1.h[2] 2443; CHECK-GI-NEXT: mov h15, v0.h[1] 2444; CHECK-GI-NEXT: mov h8, v0.h[2] 2445; CHECK-GI-NEXT: mov h9, v0.h[3] 2446; CHECK-GI-NEXT: mov h10, v0.h[4] 2447; CHECK-GI-NEXT: mov h11, v0.h[5] 2448; CHECK-GI-NEXT: mov h12, v0.h[6] 2449; CHECK-GI-NEXT: mov h13, v0.h[7] 2450; CHECK-GI-NEXT: fcvt s0, h0 2451; CHECK-GI-NEXT: str h1, [sp, #16] // 2-byte Folded Spill 2452; CHECK-GI-NEXT: mov h1, v2.h[3] 2453; CHECK-GI-NEXT: str h1, [sp, #32] // 2-byte Folded Spill 2454; CHECK-GI-NEXT: mov h1, v2.h[4] 2455; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Folded Spill 2456; CHECK-GI-NEXT: mov h1, v2.h[5] 2457; CHECK-GI-NEXT: str h1, [sp, #64] // 2-byte Folded Spill 2458; CHECK-GI-NEXT: mov h1, v2.h[6] 2459; CHECK-GI-NEXT: str h1, [sp, #96] // 2-byte Folded Spill 2460; CHECK-GI-NEXT: mov h1, v2.h[7] 2461; CHECK-GI-NEXT: str h1, [sp, #160] // 2-byte Folded Spill 2462; CHECK-GI-NEXT: bl cosf 2463; CHECK-GI-NEXT: fcvt s1, h15 2464; CHECK-GI-NEXT: fcvt h0, s0 2465; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Folded Spill 2466; CHECK-GI-NEXT: fmov s0, s1 2467; CHECK-GI-NEXT: bl cosf 2468; CHECK-GI-NEXT: fcvt s1, h8 2469; CHECK-GI-NEXT: fcvt h0, s0 2470; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill 2471; CHECK-GI-NEXT: fmov s0, s1 2472; CHECK-GI-NEXT: bl cosf 2473; CHECK-GI-NEXT: fcvt s1, h9 2474; CHECK-GI-NEXT: fcvt h0, s0 2475; CHECK-GI-NEXT: str q0, [sp, #224] // 16-byte Folded Spill 2476; CHECK-GI-NEXT: fmov s0, s1 2477; CHECK-GI-NEXT: bl cosf 2478; CHECK-GI-NEXT: fcvt s1, h10 2479; CHECK-GI-NEXT: fcvt h0, s0 2480; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Folded Spill 2481; CHECK-GI-NEXT: fmov s0, s1 2482; CHECK-GI-NEXT: bl cosf 2483; CHECK-GI-NEXT: fcvt s1, h11 2484; CHECK-GI-NEXT: fcvt h0, s0 2485; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Folded Spill 2486; CHECK-GI-NEXT: fmov s0, s1 2487; CHECK-GI-NEXT: bl cosf 2488; CHECK-GI-NEXT: fcvt s1, h12 2489; CHECK-GI-NEXT: fcvt h0, s0 2490; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Folded Spill 2491; CHECK-GI-NEXT: fmov s0, s1 2492; CHECK-GI-NEXT: bl cosf 2493; CHECK-GI-NEXT: fcvt s1, h13 2494; CHECK-GI-NEXT: fcvt h0, s0 2495; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Folded Spill 2496; CHECK-GI-NEXT: fmov s0, s1 2497; CHECK-GI-NEXT: bl cosf 2498; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload 2499; CHECK-GI-NEXT: fcvt h0, s0 2500; CHECK-GI-NEXT: fcvt s1, h1 2501; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill 2502; CHECK-GI-NEXT: fmov s0, s1 2503; CHECK-GI-NEXT: bl cosf 2504; CHECK-GI-NEXT: fcvt s1, h14 2505; CHECK-GI-NEXT: fcvt h0, s0 2506; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 2507; CHECK-GI-NEXT: fmov s0, s1 2508; CHECK-GI-NEXT: bl cosf 2509; CHECK-GI-NEXT: ldr h1, [sp, #16] // 2-byte Folded Reload 2510; CHECK-GI-NEXT: fcvt h0, s0 2511; CHECK-GI-NEXT: fcvt s1, h1 2512; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 2513; CHECK-GI-NEXT: fmov s0, s1 2514; CHECK-GI-NEXT: bl cosf 2515; CHECK-GI-NEXT: ldr h1, [sp, #32] // 2-byte Folded Reload 2516; CHECK-GI-NEXT: fcvt h0, s0 2517; CHECK-GI-NEXT: fcvt s1, h1 2518; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 2519; CHECK-GI-NEXT: fmov s0, s1 2520; CHECK-GI-NEXT: bl cosf 2521; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload 2522; CHECK-GI-NEXT: fcvt h0, s0 2523; CHECK-GI-NEXT: fcvt s1, h1 2524; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill 2525; CHECK-GI-NEXT: fmov s0, s1 2526; CHECK-GI-NEXT: bl cosf 2527; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Folded Reload 2528; CHECK-GI-NEXT: fcvt h0, s0 2529; CHECK-GI-NEXT: fcvt s1, h1 2530; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 2531; CHECK-GI-NEXT: fmov s0, s1 2532; CHECK-GI-NEXT: bl cosf 2533; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Folded Reload 2534; CHECK-GI-NEXT: fcvt h0, s0 2535; CHECK-GI-NEXT: fcvt s1, h1 2536; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill 2537; CHECK-GI-NEXT: fmov s0, s1 2538; CHECK-GI-NEXT: bl cosf 2539; CHECK-GI-NEXT: ldr h1, [sp, #160] // 2-byte Folded Reload 2540; CHECK-GI-NEXT: fcvt h0, s0 2541; CHECK-GI-NEXT: fcvt s1, h1 2542; CHECK-GI-NEXT: str q0, [sp, #160] // 16-byte Folded Spill 2543; CHECK-GI-NEXT: fmov s0, s1 2544; CHECK-GI-NEXT: bl cosf 2545; CHECK-GI-NEXT: ldr q3, [sp, #192] // 16-byte Folded Reload 2546; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Folded Reload 2547; CHECK-GI-NEXT: ldp x29, x30, [sp, #304] // 16-byte Folded Reload 2548; CHECK-GI-NEXT: mov v3.h[1], v2.h[0] 2549; CHECK-GI-NEXT: ldp q1, q2, [sp] // 32-byte Folded Reload 2550; CHECK-GI-NEXT: ldp d9, d8, [sp, #288] // 16-byte Folded Reload 2551; CHECK-GI-NEXT: ldp d11, d10, [sp, #272] // 16-byte Folded Reload 2552; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] 2553; CHECK-GI-NEXT: ldr q2, [sp, #224] // 16-byte Folded Reload 2554; CHECK-GI-NEXT: ldp d13, d12, [sp, #256] // 16-byte Folded Reload 2555; CHECK-GI-NEXT: mov v3.h[2], v2.h[0] 2556; CHECK-GI-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload 2557; CHECK-GI-NEXT: ldp d15, d14, [sp, #240] // 16-byte Folded Reload 2558; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] 2559; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Folded Reload 2560; CHECK-GI-NEXT: mov v3.h[3], v2.h[0] 2561; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload 2562; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] 2563; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Folded Reload 2564; CHECK-GI-NEXT: mov v3.h[4], v2.h[0] 2565; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload 2566; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] 2567; CHECK-GI-NEXT: ldr q2, [sp, #144] // 16-byte Folded Reload 2568; CHECK-GI-NEXT: mov v3.h[5], v2.h[0] 2569; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload 2570; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] 2571; CHECK-GI-NEXT: fcvt h2, s0 2572; CHECK-GI-NEXT: ldr q0, [sp, #128] // 16-byte Folded Reload 2573; CHECK-GI-NEXT: mov v3.h[6], v0.h[0] 2574; CHECK-GI-NEXT: ldr q0, [sp, #160] // 16-byte Folded Reload 2575; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] 2576; CHECK-GI-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload 2577; CHECK-GI-NEXT: mov v3.h[7], v0.h[0] 2578; CHECK-GI-NEXT: mov v1.h[7], v2.h[0] 2579; CHECK-GI-NEXT: mov v0.16b, v3.16b 2580; CHECK-GI-NEXT: add sp, sp, #320 2581; CHECK-GI-NEXT: ret 2582entry: 2583 %c = call <16 x half> @llvm.cos.v16f16(<16 x half> %a) 2584 ret <16 x half> %c 2585} 2586 2587define <2 x fp128> @cos_v2fp128(<2 x fp128> %a) { 2588; CHECK-LABEL: cos_v2fp128: 2589; CHECK: // %bb.0: // %entry 2590; CHECK-NEXT: sub sp, sp, #48 2591; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 2592; CHECK-NEXT: .cfi_def_cfa_offset 48 2593; CHECK-NEXT: .cfi_offset w30, -16 2594; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 2595; CHECK-NEXT: bl cosl 2596; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill 2597; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 2598; CHECK-NEXT: bl cosl 2599; CHECK-NEXT: mov v1.16b, v0.16b 2600; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload 2601; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 2602; CHECK-NEXT: add sp, sp, #48 2603; CHECK-NEXT: ret 2604entry: 2605 %c = call <2 x fp128> @llvm.cos.v2fp128(<2 x fp128> %a) 2606 ret <2 x fp128> %c 2607} 2608 2609; This is testing that we do not produce incorrect tailcall lowerings 2610define i64 @donttailcall(double noundef %x, double noundef %y) { 2611; CHECK-LABEL: donttailcall: 2612; CHECK: // %bb.0: // %entry 2613; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 2614; CHECK-NEXT: .cfi_def_cfa_offset 16 2615; CHECK-NEXT: .cfi_offset w30, -16 2616; CHECK-NEXT: bl sin 2617; CHECK-NEXT: fmov x0, d0 2618; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 2619; CHECK-NEXT: ret 2620entry: 2621 %call = tail call double @llvm.sin.f64(double noundef %x) 2622 %0 = bitcast double %call to i64 2623 ret i64 %0 2624} 2625 2626 2627declare <16 x half> @llvm.cos.v16f16(<16 x half>) 2628declare <16 x half> @llvm.sin.v16f16(<16 x half>) 2629declare <2 x double> @llvm.cos.v2f64(<2 x double>) 2630declare <2 x double> @llvm.sin.v2f64(<2 x double>) 2631declare <2 x float> @llvm.cos.v2f32(<2 x float>) 2632declare <2 x float> @llvm.sin.v2f32(<2 x float>) 2633declare <2 x fp128> @llvm.cos.v2fp128(<2 x fp128>) 2634declare <2 x fp128> @llvm.sin.v2fp128(<2 x fp128>) 2635declare <3 x double> @llvm.cos.v3f64(<3 x double>) 2636declare <3 x double> @llvm.sin.v3f64(<3 x double>) 2637declare <3 x float> @llvm.cos.v3f32(<3 x float>) 2638declare <3 x float> @llvm.sin.v3f32(<3 x float>) 2639declare <4 x double> @llvm.cos.v4f64(<4 x double>) 2640declare <4 x double> @llvm.sin.v4f64(<4 x double>) 2641declare <4 x float> @llvm.cos.v4f32(<4 x float>) 2642declare <4 x float> @llvm.sin.v4f32(<4 x float>) 2643declare <4 x half> @llvm.cos.v4f16(<4 x half>) 2644declare <4 x half> @llvm.sin.v4f16(<4 x half>) 2645declare <7 x half> @llvm.cos.v7f16(<7 x half>) 2646declare <7 x half> @llvm.sin.v7f16(<7 x half>) 2647declare <8 x float> @llvm.cos.v8f32(<8 x float>) 2648declare <8 x float> @llvm.sin.v8f32(<8 x float>) 2649declare <8 x half> @llvm.cos.v8f16(<8 x half>) 2650declare <8 x half> @llvm.sin.v8f16(<8 x half>) 2651declare double @llvm.cos.f64(double) 2652declare double @llvm.sin.f64(double) 2653declare float @llvm.cos.f32(float) 2654declare float @llvm.sin.f32(float) 2655declare fp128 @llvm.cos.fp128(fp128) 2656declare fp128 @llvm.sin.fp128(fp128) 2657declare half @llvm.cos.f16(half) 2658declare half @llvm.sin.f16(half) 2659