1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5define double @pow_f64(double %a, double %b) { 6; CHECK-LABEL: pow_f64: 7; CHECK: // %bb.0: // %entry 8; CHECK-NEXT: b pow 9entry: 10 %c = call double @llvm.pow.f64(double %a, double %b) 11 ret double %c 12} 13 14define float @pow_f32(float %a, float %b) { 15; CHECK-LABEL: pow_f32: 16; CHECK: // %bb.0: // %entry 17; CHECK-NEXT: b powf 18entry: 19 %c = call float @llvm.pow.f32(float %a, float %b) 20 ret float %c 21} 22 23define half @pow_f16(half %a, half %b) { 24; CHECK-LABEL: pow_f16: 25; CHECK: // %bb.0: // %entry 26; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 27; CHECK-NEXT: .cfi_def_cfa_offset 16 28; CHECK-NEXT: .cfi_offset w30, -16 29; CHECK-NEXT: fcvt s0, h0 30; CHECK-NEXT: fcvt s1, h1 31; CHECK-NEXT: bl powf 32; CHECK-NEXT: fcvt h0, s0 33; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 34; CHECK-NEXT: ret 35entry: 36 %c = call half @llvm.pow.f16(half %a, half %b) 37 ret half %c 38} 39 40define fp128 @pow_fp128(fp128 %a, fp128 %b) { 41; CHECK-LABEL: pow_fp128: 42; CHECK: // %bb.0: // %entry 43; CHECK-NEXT: b powl 44entry: 45 %c = call fp128 @llvm.pow.fp128(fp128 %a, fp128 %b) 46 ret fp128 %c 47} 48 49define <1 x double> @pow_v1f64(<1 x double> %x) { 50; CHECK-LABEL: pow_v1f64: 51; CHECK: // %bb.0: 52; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill 53; CHECK-NEXT: .cfi_def_cfa_offset 16 54; CHECK-NEXT: .cfi_offset w30, -16 55; CHECK-NEXT: adrp x8, .LCPI4_0 56; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] 57; CHECK-NEXT: bl pow 58; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload 59; CHECK-NEXT: ret 60 %c = call <1 x double> @llvm.pow.v1f64(<1 x double> %x, <1 x double> <double 3.140000e+00>) 61 ret <1 x double> %c 62} 63 64define <2 x double> @pow_v2f64(<2 x double> %a, <2 x double> %b) { 65; CHECK-SD-LABEL: pow_v2f64: 66; CHECK-SD: // %bb.0: // %entry 67; CHECK-SD-NEXT: sub sp, sp, #64 68; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 69; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 70; CHECK-SD-NEXT: .cfi_offset w30, -16 71; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill 72; CHECK-SD-NEXT: mov d0, v0.d[1] 73; CHECK-SD-NEXT: mov d1, v1.d[1] 74; CHECK-SD-NEXT: bl pow 75; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 76; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 77; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 78; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 79; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 80; CHECK-SD-NEXT: bl pow 81; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 82; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 83; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 84; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] 85; CHECK-SD-NEXT: add sp, sp, #64 86; CHECK-SD-NEXT: ret 87; 88; CHECK-GI-LABEL: pow_v2f64: 89; CHECK-GI: // %bb.0: // %entry 90; CHECK-GI-NEXT: sub sp, sp, #48 91; CHECK-GI-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill 92; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 93; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 94; CHECK-GI-NEXT: .cfi_offset w30, -16 95; CHECK-GI-NEXT: .cfi_offset b8, -24 96; CHECK-GI-NEXT: .cfi_offset b9, -32 97; CHECK-GI-NEXT: mov d8, v0.d[1] 98; CHECK-GI-NEXT: mov d9, v1.d[1] 99; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 100; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1 101; CHECK-GI-NEXT: bl pow 102; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 103; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 104; CHECK-GI-NEXT: fmov d1, d9 105; CHECK-GI-NEXT: fmov d0, d8 106; CHECK-GI-NEXT: bl pow 107; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload 108; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 109; CHECK-GI-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload 110; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 111; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] 112; CHECK-GI-NEXT: mov v0.16b, v1.16b 113; CHECK-GI-NEXT: add sp, sp, #48 114; CHECK-GI-NEXT: ret 115entry: 116 %c = call <2 x double> @llvm.pow.v2f64(<2 x double> %a, <2 x double> %b) 117 ret <2 x double> %c 118} 119 120define <3 x double> @pow_v3f64(<3 x double> %a, <3 x double> %b) { 121; CHECK-SD-LABEL: pow_v3f64: 122; CHECK-SD: // %bb.0: // %entry 123; CHECK-SD-NEXT: str d12, [sp, #-48]! // 8-byte Folded Spill 124; CHECK-SD-NEXT: stp d11, d10, [sp, #8] // 16-byte Folded Spill 125; CHECK-SD-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill 126; CHECK-SD-NEXT: str x30, [sp, #40] // 8-byte Folded Spill 127; CHECK-SD-NEXT: .cfi_def_cfa_offset 48 128; CHECK-SD-NEXT: .cfi_offset w30, -8 129; CHECK-SD-NEXT: .cfi_offset b8, -16 130; CHECK-SD-NEXT: .cfi_offset b9, -24 131; CHECK-SD-NEXT: .cfi_offset b10, -32 132; CHECK-SD-NEXT: .cfi_offset b11, -40 133; CHECK-SD-NEXT: .cfi_offset b12, -48 134; CHECK-SD-NEXT: fmov d11, d1 135; CHECK-SD-NEXT: fmov d1, d3 136; CHECK-SD-NEXT: fmov d8, d5 137; CHECK-SD-NEXT: fmov d9, d4 138; CHECK-SD-NEXT: fmov d10, d2 139; CHECK-SD-NEXT: bl pow 140; CHECK-SD-NEXT: fmov d12, d0 141; CHECK-SD-NEXT: fmov d0, d11 142; CHECK-SD-NEXT: fmov d1, d9 143; CHECK-SD-NEXT: bl pow 144; CHECK-SD-NEXT: fmov d9, d0 145; CHECK-SD-NEXT: fmov d0, d10 146; CHECK-SD-NEXT: fmov d1, d8 147; CHECK-SD-NEXT: bl pow 148; CHECK-SD-NEXT: fmov d1, d9 149; CHECK-SD-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload 150; CHECK-SD-NEXT: ldp d11, d10, [sp, #8] // 16-byte Folded Reload 151; CHECK-SD-NEXT: fmov d2, d0 152; CHECK-SD-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload 153; CHECK-SD-NEXT: fmov d0, d12 154; CHECK-SD-NEXT: ldr d12, [sp], #48 // 8-byte Folded Reload 155; CHECK-SD-NEXT: ret 156; 157; CHECK-GI-LABEL: pow_v3f64: 158; CHECK-GI: // %bb.0: // %entry 159; CHECK-GI-NEXT: str d12, [sp, #-48]! // 8-byte Folded Spill 160; CHECK-GI-NEXT: stp d11, d10, [sp, #8] // 16-byte Folded Spill 161; CHECK-GI-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill 162; CHECK-GI-NEXT: str x30, [sp, #40] // 8-byte Folded Spill 163; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 164; CHECK-GI-NEXT: .cfi_offset w30, -8 165; CHECK-GI-NEXT: .cfi_offset b8, -16 166; CHECK-GI-NEXT: .cfi_offset b9, -24 167; CHECK-GI-NEXT: .cfi_offset b10, -32 168; CHECK-GI-NEXT: .cfi_offset b11, -40 169; CHECK-GI-NEXT: .cfi_offset b12, -48 170; CHECK-GI-NEXT: fmov d8, d1 171; CHECK-GI-NEXT: fmov d1, d3 172; CHECK-GI-NEXT: fmov d9, d2 173; CHECK-GI-NEXT: fmov d10, d4 174; CHECK-GI-NEXT: fmov d11, d5 175; CHECK-GI-NEXT: bl pow 176; CHECK-GI-NEXT: fmov d12, d0 177; CHECK-GI-NEXT: fmov d0, d8 178; CHECK-GI-NEXT: fmov d1, d10 179; CHECK-GI-NEXT: bl pow 180; CHECK-GI-NEXT: fmov d8, d0 181; CHECK-GI-NEXT: fmov d0, d9 182; CHECK-GI-NEXT: fmov d1, d11 183; CHECK-GI-NEXT: bl pow 184; CHECK-GI-NEXT: fmov d1, d8 185; CHECK-GI-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload 186; CHECK-GI-NEXT: ldp d11, d10, [sp, #8] // 16-byte Folded Reload 187; CHECK-GI-NEXT: fmov d2, d0 188; CHECK-GI-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload 189; CHECK-GI-NEXT: fmov d0, d12 190; CHECK-GI-NEXT: ldr d12, [sp], #48 // 8-byte Folded Reload 191; CHECK-GI-NEXT: ret 192entry: 193 %c = call <3 x double> @llvm.pow.v3f64(<3 x double> %a, <3 x double> %b) 194 ret <3 x double> %c 195} 196 197define <4 x double> @pow_v4f64(<4 x double> %a, <4 x double> %b) { 198; CHECK-SD-LABEL: pow_v4f64: 199; CHECK-SD: // %bb.0: // %entry 200; CHECK-SD-NEXT: sub sp, sp, #96 201; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill 202; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 203; CHECK-SD-NEXT: .cfi_offset w30, -16 204; CHECK-SD-NEXT: stp q0, q2, [sp] // 32-byte Folded Spill 205; CHECK-SD-NEXT: mov d0, v0.d[1] 206; CHECK-SD-NEXT: stp q1, q3, [sp, #48] // 32-byte Folded Spill 207; CHECK-SD-NEXT: mov d1, v2.d[1] 208; CHECK-SD-NEXT: bl pow 209; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 210; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 211; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 212; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 213; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 214; CHECK-SD-NEXT: bl pow 215; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 216; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 217; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] 218; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 219; CHECK-SD-NEXT: ldp q0, q1, [sp, #48] // 32-byte Folded Reload 220; CHECK-SD-NEXT: mov d0, v0.d[1] 221; CHECK-SD-NEXT: mov d1, v1.d[1] 222; CHECK-SD-NEXT: bl pow 223; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 224; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 225; CHECK-SD-NEXT: ldp q0, q1, [sp, #48] // 32-byte Folded Reload 226; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 227; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1 228; CHECK-SD-NEXT: bl pow 229; CHECK-SD-NEXT: fmov d1, d0 230; CHECK-SD-NEXT: ldp q2, q0, [sp, #16] // 32-byte Folded Reload 231; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload 232; CHECK-SD-NEXT: mov v1.d[1], v2.d[0] 233; CHECK-SD-NEXT: add sp, sp, #96 234; CHECK-SD-NEXT: ret 235; 236; CHECK-GI-LABEL: pow_v4f64: 237; CHECK-GI: // %bb.0: // %entry 238; CHECK-GI-NEXT: sub sp, sp, #112 239; CHECK-GI-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill 240; CHECK-GI-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill 241; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill 242; CHECK-GI-NEXT: .cfi_def_cfa_offset 112 243; CHECK-GI-NEXT: .cfi_offset w30, -16 244; CHECK-GI-NEXT: .cfi_offset b8, -24 245; CHECK-GI-NEXT: .cfi_offset b9, -32 246; CHECK-GI-NEXT: .cfi_offset b10, -40 247; CHECK-GI-NEXT: .cfi_offset b11, -48 248; CHECK-GI-NEXT: mov v4.16b, v1.16b 249; CHECK-GI-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 250; CHECK-GI-NEXT: mov v1.16b, v2.16b 251; CHECK-GI-NEXT: str q3, [sp] // 16-byte Folded Spill 252; CHECK-GI-NEXT: mov d8, v0.d[1] 253; CHECK-GI-NEXT: mov d10, v2.d[1] 254; CHECK-GI-NEXT: mov d11, v3.d[1] 255; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 256; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1 257; CHECK-GI-NEXT: mov d9, v4.d[1] 258; CHECK-GI-NEXT: bl pow 259; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 260; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill 261; CHECK-GI-NEXT: fmov d1, d10 262; CHECK-GI-NEXT: fmov d0, d8 263; CHECK-GI-NEXT: bl pow 264; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 265; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 266; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload 267; CHECK-GI-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload 268; CHECK-GI-NEXT: // kill: def $d1 killed $d1 killed $q1 269; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 270; CHECK-GI-NEXT: bl pow 271; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 272; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 273; CHECK-GI-NEXT: fmov d1, d11 274; CHECK-GI-NEXT: fmov d0, d9 275; CHECK-GI-NEXT: bl pow 276; CHECK-GI-NEXT: ldp q3, q1, [sp, #16] // 32-byte Folded Reload 277; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 278; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload 279; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload 280; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload 281; CHECK-GI-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload 282; CHECK-GI-NEXT: mov v2.d[1], v3.d[0] 283; CHECK-GI-NEXT: mov v1.d[1], v0.d[0] 284; CHECK-GI-NEXT: mov v0.16b, v2.16b 285; CHECK-GI-NEXT: add sp, sp, #112 286; CHECK-GI-NEXT: ret 287entry: 288 %c = call <4 x double> @llvm.pow.v4f64(<4 x double> %a, <4 x double> %b) 289 ret <4 x double> %c 290} 291 292define <2 x float> @pow_v2f32(<2 x float> %a, <2 x float> %b) { 293; CHECK-SD-LABEL: pow_v2f32: 294; CHECK-SD: // %bb.0: // %entry 295; CHECK-SD-NEXT: sub sp, sp, #64 296; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 297; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 298; CHECK-SD-NEXT: .cfi_offset w30, -16 299; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 300; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 301; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill 302; CHECK-SD-NEXT: mov s0, v0.s[1] 303; CHECK-SD-NEXT: mov s1, v1.s[1] 304; CHECK-SD-NEXT: bl powf 305; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 306; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 307; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 308; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 309; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1 310; CHECK-SD-NEXT: bl powf 311; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 312; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 313; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 314; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] 315; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 316; CHECK-SD-NEXT: add sp, sp, #64 317; CHECK-SD-NEXT: ret 318; 319; CHECK-GI-LABEL: pow_v2f32: 320; CHECK-GI: // %bb.0: // %entry 321; CHECK-GI-NEXT: sub sp, sp, #48 322; CHECK-GI-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill 323; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill 324; CHECK-GI-NEXT: .cfi_def_cfa_offset 48 325; CHECK-GI-NEXT: .cfi_offset w30, -16 326; CHECK-GI-NEXT: .cfi_offset b8, -24 327; CHECK-GI-NEXT: .cfi_offset b9, -32 328; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 329; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 330; CHECK-GI-NEXT: mov s8, v0.s[1] 331; CHECK-GI-NEXT: mov s9, v1.s[1] 332; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 333; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1 334; CHECK-GI-NEXT: bl powf 335; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 336; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 337; CHECK-GI-NEXT: fmov s1, s9 338; CHECK-GI-NEXT: fmov s0, s8 339; CHECK-GI-NEXT: bl powf 340; CHECK-GI-NEXT: ldr q1, [sp] // 16-byte Folded Reload 341; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 342; CHECK-GI-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload 343; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload 344; CHECK-GI-NEXT: mov v1.s[1], v0.s[0] 345; CHECK-GI-NEXT: fmov d0, d1 346; CHECK-GI-NEXT: add sp, sp, #48 347; CHECK-GI-NEXT: ret 348entry: 349 %c = call <2 x float> @llvm.pow.v2f32(<2 x float> %a, <2 x float> %b) 350 ret <2 x float> %c 351} 352 353define <3 x float> @pow_v3f32(<3 x float> %a, <3 x float> %b) { 354; CHECK-SD-LABEL: pow_v3f32: 355; CHECK-SD: // %bb.0: // %entry 356; CHECK-SD-NEXT: sub sp, sp, #64 357; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 358; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 359; CHECK-SD-NEXT: .cfi_offset w30, -16 360; CHECK-SD-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill 361; CHECK-SD-NEXT: mov s0, v0.s[1] 362; CHECK-SD-NEXT: mov s1, v1.s[1] 363; CHECK-SD-NEXT: bl powf 364; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 365; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 366; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 367; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 368; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1 369; CHECK-SD-NEXT: bl powf 370; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 371; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 372; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] 373; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 374; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 375; CHECK-SD-NEXT: mov s0, v0.s[2] 376; CHECK-SD-NEXT: mov s1, v1.s[2] 377; CHECK-SD-NEXT: bl powf 378; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 379; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 380; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 381; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] 382; CHECK-SD-NEXT: mov v0.16b, v1.16b 383; CHECK-SD-NEXT: add sp, sp, #64 384; CHECK-SD-NEXT: ret 385; 386; CHECK-GI-LABEL: pow_v3f32: 387; CHECK-GI: // %bb.0: // %entry 388; CHECK-GI-NEXT: sub sp, sp, #80 389; CHECK-GI-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill 390; CHECK-GI-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill 391; CHECK-GI-NEXT: str x30, [sp, #64] // 8-byte Folded Spill 392; CHECK-GI-NEXT: .cfi_def_cfa_offset 80 393; CHECK-GI-NEXT: .cfi_offset w30, -16 394; CHECK-GI-NEXT: .cfi_offset b8, -24 395; CHECK-GI-NEXT: .cfi_offset b9, -32 396; CHECK-GI-NEXT: .cfi_offset b10, -40 397; CHECK-GI-NEXT: .cfi_offset b11, -48 398; CHECK-GI-NEXT: mov s8, v0.s[1] 399; CHECK-GI-NEXT: mov s9, v0.s[2] 400; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 401; CHECK-GI-NEXT: mov s10, v1.s[1] 402; CHECK-GI-NEXT: mov s11, v1.s[2] 403; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1 404; CHECK-GI-NEXT: bl powf 405; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 406; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 407; CHECK-GI-NEXT: fmov s1, s10 408; CHECK-GI-NEXT: fmov s0, s8 409; CHECK-GI-NEXT: bl powf 410; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 411; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 412; CHECK-GI-NEXT: fmov s1, s11 413; CHECK-GI-NEXT: fmov s0, s9 414; CHECK-GI-NEXT: bl powf 415; CHECK-GI-NEXT: ldp q2, q1, [sp] // 32-byte Folded Reload 416; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 417; CHECK-GI-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload 418; CHECK-GI-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload 419; CHECK-GI-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload 420; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] 421; CHECK-GI-NEXT: mov v1.s[2], v0.s[0] 422; CHECK-GI-NEXT: mov v0.16b, v1.16b 423; CHECK-GI-NEXT: add sp, sp, #80 424; CHECK-GI-NEXT: ret 425entry: 426 %c = call <3 x float> @llvm.pow.v3f32(<3 x float> %a, <3 x float> %b) 427 ret <3 x float> %c 428} 429 430define <4 x float> @pow_v4f32(<4 x float> %a, <4 x float> %b) { 431; CHECK-SD-LABEL: pow_v4f32: 432; CHECK-SD: // %bb.0: // %entry 433; CHECK-SD-NEXT: sub sp, sp, #64 434; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 435; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 436; CHECK-SD-NEXT: .cfi_offset w30, -16 437; CHECK-SD-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill 438; CHECK-SD-NEXT: mov s0, v0.s[1] 439; CHECK-SD-NEXT: mov s1, v1.s[1] 440; CHECK-SD-NEXT: bl powf 441; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 442; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 443; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 444; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 445; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1 446; CHECK-SD-NEXT: bl powf 447; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 448; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 449; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] 450; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 451; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 452; CHECK-SD-NEXT: mov s0, v0.s[2] 453; CHECK-SD-NEXT: mov s1, v1.s[2] 454; CHECK-SD-NEXT: bl powf 455; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 456; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 457; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] 458; CHECK-SD-NEXT: str q1, [sp] // 16-byte Folded Spill 459; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 460; CHECK-SD-NEXT: mov s0, v0.s[3] 461; CHECK-SD-NEXT: mov s1, v1.s[3] 462; CHECK-SD-NEXT: bl powf 463; CHECK-SD-NEXT: ldr q1, [sp] // 16-byte Folded Reload 464; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 465; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 466; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] 467; CHECK-SD-NEXT: mov v0.16b, v1.16b 468; CHECK-SD-NEXT: add sp, sp, #64 469; CHECK-SD-NEXT: ret 470; 471; CHECK-GI-LABEL: pow_v4f32: 472; CHECK-GI: // %bb.0: // %entry 473; CHECK-GI-NEXT: sub sp, sp, #112 474; CHECK-GI-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill 475; CHECK-GI-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill 476; CHECK-GI-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill 477; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill 478; CHECK-GI-NEXT: .cfi_def_cfa_offset 112 479; CHECK-GI-NEXT: .cfi_offset w30, -16 480; CHECK-GI-NEXT: .cfi_offset b8, -24 481; CHECK-GI-NEXT: .cfi_offset b9, -32 482; CHECK-GI-NEXT: .cfi_offset b10, -40 483; CHECK-GI-NEXT: .cfi_offset b11, -48 484; CHECK-GI-NEXT: .cfi_offset b12, -56 485; CHECK-GI-NEXT: .cfi_offset b13, -64 486; CHECK-GI-NEXT: mov s8, v0.s[1] 487; CHECK-GI-NEXT: mov s9, v0.s[2] 488; CHECK-GI-NEXT: mov s10, v0.s[3] 489; CHECK-GI-NEXT: mov s11, v1.s[1] 490; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 491; CHECK-GI-NEXT: mov s12, v1.s[2] 492; CHECK-GI-NEXT: mov s13, v1.s[3] 493; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1 494; CHECK-GI-NEXT: bl powf 495; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 496; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 497; CHECK-GI-NEXT: fmov s1, s11 498; CHECK-GI-NEXT: fmov s0, s8 499; CHECK-GI-NEXT: bl powf 500; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 501; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 502; CHECK-GI-NEXT: fmov s1, s12 503; CHECK-GI-NEXT: fmov s0, s9 504; CHECK-GI-NEXT: bl powf 505; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 506; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 507; CHECK-GI-NEXT: fmov s1, s13 508; CHECK-GI-NEXT: fmov s0, s10 509; CHECK-GI-NEXT: bl powf 510; CHECK-GI-NEXT: ldp q2, q1, [sp, #16] // 32-byte Folded Reload 511; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 512; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload 513; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload 514; CHECK-GI-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload 515; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] 516; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload 517; CHECK-GI-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload 518; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] 519; CHECK-GI-NEXT: mov v1.s[3], v0.s[0] 520; CHECK-GI-NEXT: mov v0.16b, v1.16b 521; CHECK-GI-NEXT: add sp, sp, #112 522; CHECK-GI-NEXT: ret 523entry: 524 %c = call <4 x float> @llvm.pow.v4f32(<4 x float> %a, <4 x float> %b) 525 ret <4 x float> %c 526} 527 528define <8 x float> @pow_v8f32(<8 x float> %a, <8 x float> %b) { 529; CHECK-SD-LABEL: pow_v8f32: 530; CHECK-SD: // %bb.0: // %entry 531; CHECK-SD-NEXT: sub sp, sp, #96 532; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill 533; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 534; CHECK-SD-NEXT: .cfi_offset w30, -16 535; CHECK-SD-NEXT: stp q0, q2, [sp] // 32-byte Folded Spill 536; CHECK-SD-NEXT: mov s0, v0.s[1] 537; CHECK-SD-NEXT: stp q1, q3, [sp, #32] // 32-byte Folded Spill 538; CHECK-SD-NEXT: mov s1, v2.s[1] 539; CHECK-SD-NEXT: bl powf 540; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 541; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 542; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 543; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 544; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1 545; CHECK-SD-NEXT: bl powf 546; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload 547; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 548; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] 549; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 550; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 551; CHECK-SD-NEXT: mov s0, v0.s[2] 552; CHECK-SD-NEXT: mov s1, v1.s[2] 553; CHECK-SD-NEXT: bl powf 554; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload 555; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 556; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] 557; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill 558; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 559; CHECK-SD-NEXT: mov s0, v0.s[3] 560; CHECK-SD-NEXT: mov s1, v1.s[3] 561; CHECK-SD-NEXT: bl powf 562; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload 563; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 564; CHECK-SD-NEXT: mov v1.s[3], v0.s[0] 565; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill 566; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload 567; CHECK-SD-NEXT: mov s0, v0.s[1] 568; CHECK-SD-NEXT: mov s1, v1.s[1] 569; CHECK-SD-NEXT: bl powf 570; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 571; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 572; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload 573; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0 574; CHECK-SD-NEXT: // kill: def $s1 killed $s1 killed $q1 575; CHECK-SD-NEXT: bl powf 576; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 577; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 578; CHECK-SD-NEXT: mov v0.s[1], v1.s[0] 579; CHECK-SD-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 580; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload 581; CHECK-SD-NEXT: mov s0, v0.s[2] 582; CHECK-SD-NEXT: mov s1, v1.s[2] 583; CHECK-SD-NEXT: bl powf 584; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 585; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0 586; CHECK-SD-NEXT: mov v1.s[2], v0.s[0] 587; CHECK-SD-NEXT: str q1, [sp, #16] // 16-byte Folded Spill 588; CHECK-SD-NEXT: ldp q0, q1, [sp, #32] // 32-byte Folded Reload 589; CHECK-SD-NEXT: mov s0, v0.s[3] 590; CHECK-SD-NEXT: mov s1, v1.s[3] 591; CHECK-SD-NEXT: bl powf 592; CHECK-SD-NEXT: fmov s2, s0 593; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 594; CHECK-SD-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload 595; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload 596; CHECK-SD-NEXT: mov v1.s[3], v2.s[0] 597; CHECK-SD-NEXT: add sp, sp, #96 598; CHECK-SD-NEXT: ret 599; 600; CHECK-GI-LABEL: pow_v8f32: 601; CHECK-GI: // %bb.0: // %entry 602; CHECK-GI-NEXT: sub sp, sp, #208 603; CHECK-GI-NEXT: stp d15, d14, [sp, #128] // 16-byte Folded Spill 604; CHECK-GI-NEXT: stp d13, d12, [sp, #144] // 16-byte Folded Spill 605; CHECK-GI-NEXT: stp d11, d10, [sp, #160] // 16-byte Folded Spill 606; CHECK-GI-NEXT: stp d9, d8, [sp, #176] // 16-byte Folded Spill 607; CHECK-GI-NEXT: str x30, [sp, #192] // 8-byte Folded Spill 608; CHECK-GI-NEXT: .cfi_def_cfa_offset 208 609; CHECK-GI-NEXT: .cfi_offset w30, -16 610; CHECK-GI-NEXT: .cfi_offset b8, -24 611; CHECK-GI-NEXT: .cfi_offset b9, -32 612; CHECK-GI-NEXT: .cfi_offset b10, -40 613; CHECK-GI-NEXT: .cfi_offset b11, -48 614; CHECK-GI-NEXT: .cfi_offset b12, -56 615; CHECK-GI-NEXT: .cfi_offset b13, -64 616; CHECK-GI-NEXT: .cfi_offset b14, -72 617; CHECK-GI-NEXT: .cfi_offset b15, -80 618; CHECK-GI-NEXT: mov v4.16b, v1.16b 619; CHECK-GI-NEXT: mov v1.16b, v2.16b 620; CHECK-GI-NEXT: mov s8, v0.s[1] 621; CHECK-GI-NEXT: mov s9, v0.s[2] 622; CHECK-GI-NEXT: mov s10, v0.s[3] 623; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 624; CHECK-GI-NEXT: mov s12, v3.s[1] 625; CHECK-GI-NEXT: mov s11, v3.s[2] 626; CHECK-GI-NEXT: mov s2, v4.s[1] 627; CHECK-GI-NEXT: stp q3, q4, [sp] // 32-byte Folded Spill 628; CHECK-GI-NEXT: mov s5, v4.s[3] 629; CHECK-GI-NEXT: mov s14, v1.s[1] 630; CHECK-GI-NEXT: mov s15, v1.s[2] 631; CHECK-GI-NEXT: mov s13, v1.s[3] 632; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1 633; CHECK-GI-NEXT: str s2, [sp, #48] // 4-byte Folded Spill 634; CHECK-GI-NEXT: mov s2, v4.s[2] 635; CHECK-GI-NEXT: str s2, [sp, #112] // 4-byte Folded Spill 636; CHECK-GI-NEXT: mov s2, v3.s[3] 637; CHECK-GI-NEXT: stp s2, s5, [sp, #200] // 8-byte Folded Spill 638; CHECK-GI-NEXT: bl powf 639; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 640; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill 641; CHECK-GI-NEXT: fmov s1, s14 642; CHECK-GI-NEXT: fmov s0, s8 643; CHECK-GI-NEXT: bl powf 644; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 645; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 646; CHECK-GI-NEXT: fmov s1, s15 647; CHECK-GI-NEXT: fmov s0, s9 648; CHECK-GI-NEXT: bl powf 649; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 650; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill 651; CHECK-GI-NEXT: fmov s1, s13 652; CHECK-GI-NEXT: fmov s0, s10 653; CHECK-GI-NEXT: bl powf 654; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 655; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 656; CHECK-GI-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload 657; CHECK-GI-NEXT: // kill: def $s0 killed $s0 killed $q0 658; CHECK-GI-NEXT: // kill: def $s1 killed $s1 killed $q1 659; CHECK-GI-NEXT: bl powf 660; CHECK-GI-NEXT: fmov s1, s12 661; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 662; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 663; CHECK-GI-NEXT: ldr s0, [sp, #48] // 4-byte Folded Reload 664; CHECK-GI-NEXT: bl powf 665; CHECK-GI-NEXT: fmov s1, s11 666; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 667; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill 668; CHECK-GI-NEXT: ldr s0, [sp, #112] // 4-byte Folded Reload 669; CHECK-GI-NEXT: bl powf 670; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 671; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill 672; CHECK-GI-NEXT: ldp s1, s0, [sp, #200] // 8-byte Folded Reload 673; CHECK-GI-NEXT: bl powf 674; CHECK-GI-NEXT: ldp q3, q2, [sp, #16] // 32-byte Folded Reload 675; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0 676; CHECK-GI-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload 677; CHECK-GI-NEXT: ldp d9, d8, [sp, #176] // 16-byte Folded Reload 678; CHECK-GI-NEXT: ldr x30, [sp, #192] // 8-byte Folded Reload 679; CHECK-GI-NEXT: ldp d11, d10, [sp, #160] // 16-byte Folded Reload 680; CHECK-GI-NEXT: mov v1.s[1], v2.s[0] 681; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload 682; CHECK-GI-NEXT: ldp d13, d12, [sp, #144] // 16-byte Folded Reload 683; CHECK-GI-NEXT: mov v3.s[1], v2.s[0] 684; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload 685; CHECK-GI-NEXT: ldp d15, d14, [sp, #128] // 16-byte Folded Reload 686; CHECK-GI-NEXT: mov v1.s[2], v2.s[0] 687; CHECK-GI-NEXT: ldr q2, [sp, #112] // 16-byte Folded Reload 688; CHECK-GI-NEXT: mov v3.s[2], v2.s[0] 689; CHECK-GI-NEXT: ldr q2, [sp, #64] // 16-byte Folded Reload 690; CHECK-GI-NEXT: mov v1.s[3], v2.s[0] 691; CHECK-GI-NEXT: mov v3.s[3], v0.s[0] 692; CHECK-GI-NEXT: mov v2.16b, v1.16b 693; CHECK-GI-NEXT: mov v1.16b, v3.16b 694; CHECK-GI-NEXT: mov v0.16b, v2.16b 695; CHECK-GI-NEXT: add sp, sp, #208 696; CHECK-GI-NEXT: ret 697entry: 698 %c = call <8 x float> @llvm.pow.v8f32(<8 x float> %a, <8 x float> %b) 699 ret <8 x float> %c 700} 701 702define <7 x half> @pow_v7f16(<7 x half> %a, <7 x half> %b) { 703; CHECK-SD-LABEL: pow_v7f16: 704; CHECK-SD: // %bb.0: // %entry 705; CHECK-SD-NEXT: sub sp, sp, #64 706; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 707; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 708; CHECK-SD-NEXT: .cfi_offset w30, -16 709; CHECK-SD-NEXT: mov h3, v0.h[1] 710; CHECK-SD-NEXT: mov h2, v1.h[1] 711; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill 712; CHECK-SD-NEXT: fcvt s0, h3 713; CHECK-SD-NEXT: fcvt s1, h2 714; CHECK-SD-NEXT: bl powf 715; CHECK-SD-NEXT: fcvt h0, s0 716; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 717; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 718; CHECK-SD-NEXT: fcvt s0, h0 719; CHECK-SD-NEXT: fcvt s1, h1 720; CHECK-SD-NEXT: bl powf 721; CHECK-SD-NEXT: fcvt h0, s0 722; CHECK-SD-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload 723; CHECK-SD-NEXT: mov h1, v1.h[2] 724; CHECK-SD-NEXT: mov v0.h[1], v2.h[0] 725; CHECK-SD-NEXT: fcvt s1, h1 726; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 727; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 728; CHECK-SD-NEXT: mov h0, v0.h[2] 729; CHECK-SD-NEXT: fcvt s0, h0 730; CHECK-SD-NEXT: bl powf 731; CHECK-SD-NEXT: fcvt h0, s0 732; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 733; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] 734; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 735; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 736; CHECK-SD-NEXT: mov h0, v0.h[3] 737; CHECK-SD-NEXT: mov h1, v1.h[3] 738; CHECK-SD-NEXT: fcvt s0, h0 739; CHECK-SD-NEXT: fcvt s1, h1 740; CHECK-SD-NEXT: bl powf 741; CHECK-SD-NEXT: fcvt h0, s0 742; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 743; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] 744; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 745; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 746; CHECK-SD-NEXT: mov h0, v0.h[4] 747; CHECK-SD-NEXT: mov h1, v1.h[4] 748; CHECK-SD-NEXT: fcvt s0, h0 749; CHECK-SD-NEXT: fcvt s1, h1 750; CHECK-SD-NEXT: bl powf 751; CHECK-SD-NEXT: fcvt h0, s0 752; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 753; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] 754; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 755; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 756; CHECK-SD-NEXT: mov h0, v0.h[5] 757; CHECK-SD-NEXT: mov h1, v1.h[5] 758; CHECK-SD-NEXT: fcvt s0, h0 759; CHECK-SD-NEXT: fcvt s1, h1 760; CHECK-SD-NEXT: bl powf 761; CHECK-SD-NEXT: fcvt h0, s0 762; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 763; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] 764; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 765; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 766; CHECK-SD-NEXT: mov h0, v0.h[6] 767; CHECK-SD-NEXT: mov h1, v1.h[6] 768; CHECK-SD-NEXT: fcvt s0, h0 769; CHECK-SD-NEXT: fcvt s1, h1 770; CHECK-SD-NEXT: bl powf 771; CHECK-SD-NEXT: fcvt h0, s0 772; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 773; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] 774; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 775; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 776; CHECK-SD-NEXT: mov h0, v0.h[7] 777; CHECK-SD-NEXT: mov h1, v1.h[7] 778; CHECK-SD-NEXT: fcvt s0, h0 779; CHECK-SD-NEXT: fcvt s1, h1 780; CHECK-SD-NEXT: bl powf 781; CHECK-SD-NEXT: fcvt h1, s0 782; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload 783; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 784; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] 785; CHECK-SD-NEXT: add sp, sp, #64 786; CHECK-SD-NEXT: ret 787; 788; CHECK-GI-LABEL: pow_v7f16: 789; CHECK-GI: // %bb.0: // %entry 790; CHECK-GI-NEXT: sub sp, sp, #176 791; CHECK-GI-NEXT: stp d15, d14, [sp, #96] // 16-byte Folded Spill 792; CHECK-GI-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill 793; CHECK-GI-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill 794; CHECK-GI-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill 795; CHECK-GI-NEXT: str x30, [sp, #160] // 8-byte Folded Spill 796; CHECK-GI-NEXT: .cfi_def_cfa_offset 176 797; CHECK-GI-NEXT: .cfi_offset w30, -16 798; CHECK-GI-NEXT: .cfi_offset b8, -24 799; CHECK-GI-NEXT: .cfi_offset b9, -32 800; CHECK-GI-NEXT: .cfi_offset b10, -40 801; CHECK-GI-NEXT: .cfi_offset b11, -48 802; CHECK-GI-NEXT: .cfi_offset b12, -56 803; CHECK-GI-NEXT: .cfi_offset b13, -64 804; CHECK-GI-NEXT: .cfi_offset b14, -72 805; CHECK-GI-NEXT: .cfi_offset b15, -80 806; CHECK-GI-NEXT: mov h2, v0.h[5] 807; CHECK-GI-NEXT: mov h9, v0.h[1] 808; CHECK-GI-NEXT: mov h10, v0.h[2] 809; CHECK-GI-NEXT: mov h11, v0.h[3] 810; CHECK-GI-NEXT: mov h12, v0.h[4] 811; CHECK-GI-NEXT: mov h14, v1.h[1] 812; CHECK-GI-NEXT: mov h15, v1.h[2] 813; CHECK-GI-NEXT: mov h8, v1.h[3] 814; CHECK-GI-NEXT: mov h13, v1.h[4] 815; CHECK-GI-NEXT: str h2, [sp, #48] // 2-byte Folded Spill 816; CHECK-GI-NEXT: mov h2, v0.h[6] 817; CHECK-GI-NEXT: fcvt s0, h0 818; CHECK-GI-NEXT: str h2, [sp, #80] // 2-byte Folded Spill 819; CHECK-GI-NEXT: mov h2, v1.h[5] 820; CHECK-GI-NEXT: str h2, [sp, #172] // 2-byte Folded Spill 821; CHECK-GI-NEXT: mov h2, v1.h[6] 822; CHECK-GI-NEXT: fcvt s1, h1 823; CHECK-GI-NEXT: str h2, [sp, #174] // 2-byte Folded Spill 824; CHECK-GI-NEXT: bl powf 825; CHECK-GI-NEXT: fcvt s2, h9 826; CHECK-GI-NEXT: fcvt h0, s0 827; CHECK-GI-NEXT: fcvt s1, h14 828; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 829; CHECK-GI-NEXT: fmov s0, s2 830; CHECK-GI-NEXT: bl powf 831; CHECK-GI-NEXT: fcvt s2, h10 832; CHECK-GI-NEXT: fcvt h0, s0 833; CHECK-GI-NEXT: fcvt s1, h15 834; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 835; CHECK-GI-NEXT: fmov s0, s2 836; CHECK-GI-NEXT: bl powf 837; CHECK-GI-NEXT: fcvt s2, h11 838; CHECK-GI-NEXT: fcvt h0, s0 839; CHECK-GI-NEXT: fcvt s1, h8 840; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 841; CHECK-GI-NEXT: fmov s0, s2 842; CHECK-GI-NEXT: bl powf 843; CHECK-GI-NEXT: fcvt s2, h12 844; CHECK-GI-NEXT: fcvt h0, s0 845; CHECK-GI-NEXT: fcvt s1, h13 846; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 847; CHECK-GI-NEXT: fmov s0, s2 848; CHECK-GI-NEXT: bl powf 849; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload 850; CHECK-GI-NEXT: fcvt h0, s0 851; CHECK-GI-NEXT: fcvt s2, h1 852; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill 853; CHECK-GI-NEXT: ldr h0, [sp, #172] // 2-byte Folded Reload 854; CHECK-GI-NEXT: fcvt s1, h0 855; CHECK-GI-NEXT: fmov s0, s2 856; CHECK-GI-NEXT: bl powf 857; CHECK-GI-NEXT: ldr h1, [sp, #80] // 2-byte Folded Reload 858; CHECK-GI-NEXT: fcvt h0, s0 859; CHECK-GI-NEXT: fcvt s2, h1 860; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill 861; CHECK-GI-NEXT: ldr h0, [sp, #174] // 2-byte Folded Reload 862; CHECK-GI-NEXT: fcvt s1, h0 863; CHECK-GI-NEXT: fmov s0, s2 864; CHECK-GI-NEXT: bl powf 865; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload 866; CHECK-GI-NEXT: fcvt h0, s0 867; CHECK-GI-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload 868; CHECK-GI-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload 869; CHECK-GI-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload 870; CHECK-GI-NEXT: ldr x30, [sp, #160] // 8-byte Folded Reload 871; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] 872; CHECK-GI-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload 873; CHECK-GI-NEXT: ldp d15, d14, [sp, #96] // 16-byte Folded Reload 874; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] 875; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload 876; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] 877; CHECK-GI-NEXT: ldr q2, [sp, #48] // 16-byte Folded Reload 878; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] 879; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload 880; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] 881; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] 882; CHECK-GI-NEXT: mov v0.16b, v1.16b 883; CHECK-GI-NEXT: add sp, sp, #176 884; CHECK-GI-NEXT: ret 885entry: 886 %c = call <7 x half> @llvm.pow.v7f16(<7 x half> %a, <7 x half> %b) 887 ret <7 x half> %c 888} 889 890define <4 x half> @pow_v4f16(<4 x half> %a, <4 x half> %b) { 891; CHECK-SD-LABEL: pow_v4f16: 892; CHECK-SD: // %bb.0: // %entry 893; CHECK-SD-NEXT: sub sp, sp, #64 894; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 895; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 896; CHECK-SD-NEXT: .cfi_offset w30, -16 897; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 898; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 899; CHECK-SD-NEXT: mov h3, v0.h[1] 900; CHECK-SD-NEXT: mov h2, v1.h[1] 901; CHECK-SD-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill 902; CHECK-SD-NEXT: fcvt s0, h3 903; CHECK-SD-NEXT: fcvt s1, h2 904; CHECK-SD-NEXT: bl powf 905; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 906; CHECK-SD-NEXT: fcvt h0, s0 907; CHECK-SD-NEXT: fcvt s2, h1 908; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 909; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload 910; CHECK-SD-NEXT: fcvt s1, h0 911; CHECK-SD-NEXT: fmov s0, s2 912; CHECK-SD-NEXT: bl powf 913; CHECK-SD-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload 914; CHECK-SD-NEXT: fcvt h3, s0 915; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload 916; CHECK-SD-NEXT: mov h1, v1.h[2] 917; CHECK-SD-NEXT: mov h2, v0.h[2] 918; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 919; CHECK-SD-NEXT: mov v3.h[1], v0.h[0] 920; CHECK-SD-NEXT: fcvt s0, h1 921; CHECK-SD-NEXT: fcvt s1, h2 922; CHECK-SD-NEXT: str q3, [sp] // 16-byte Folded Spill 923; CHECK-SD-NEXT: bl powf 924; CHECK-SD-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload 925; CHECK-SD-NEXT: fcvt h0, s0 926; CHECK-SD-NEXT: ldr q3, [sp] // 16-byte Folded Reload 927; CHECK-SD-NEXT: mov h1, v1.h[3] 928; CHECK-SD-NEXT: mov h2, v2.h[3] 929; CHECK-SD-NEXT: mov v3.h[2], v0.h[0] 930; CHECK-SD-NEXT: fcvt s0, h1 931; CHECK-SD-NEXT: fcvt s1, h2 932; CHECK-SD-NEXT: str q3, [sp] // 16-byte Folded Spill 933; CHECK-SD-NEXT: bl powf 934; CHECK-SD-NEXT: fcvt h1, s0 935; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 936; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 937; CHECK-SD-NEXT: mov v0.h[3], v1.h[0] 938; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0 939; CHECK-SD-NEXT: add sp, sp, #64 940; CHECK-SD-NEXT: ret 941; 942; CHECK-GI-LABEL: pow_v4f16: 943; CHECK-GI: // %bb.0: // %entry 944; CHECK-GI-NEXT: sub sp, sp, #112 945; CHECK-GI-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill 946; CHECK-GI-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill 947; CHECK-GI-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill 948; CHECK-GI-NEXT: str x30, [sp, #96] // 8-byte Folded Spill 949; CHECK-GI-NEXT: .cfi_def_cfa_offset 112 950; CHECK-GI-NEXT: .cfi_offset w30, -16 951; CHECK-GI-NEXT: .cfi_offset b8, -24 952; CHECK-GI-NEXT: .cfi_offset b9, -32 953; CHECK-GI-NEXT: .cfi_offset b10, -40 954; CHECK-GI-NEXT: .cfi_offset b11, -48 955; CHECK-GI-NEXT: .cfi_offset b12, -56 956; CHECK-GI-NEXT: .cfi_offset b13, -64 957; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 958; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 959; CHECK-GI-NEXT: mov h8, v0.h[1] 960; CHECK-GI-NEXT: mov h9, v0.h[2] 961; CHECK-GI-NEXT: mov h10, v0.h[3] 962; CHECK-GI-NEXT: mov h11, v1.h[1] 963; CHECK-GI-NEXT: mov h12, v1.h[2] 964; CHECK-GI-NEXT: mov h13, v1.h[3] 965; CHECK-GI-NEXT: fcvt s0, h0 966; CHECK-GI-NEXT: fcvt s1, h1 967; CHECK-GI-NEXT: bl powf 968; CHECK-GI-NEXT: fcvt s2, h8 969; CHECK-GI-NEXT: fcvt h0, s0 970; CHECK-GI-NEXT: fcvt s1, h11 971; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 972; CHECK-GI-NEXT: fmov s0, s2 973; CHECK-GI-NEXT: bl powf 974; CHECK-GI-NEXT: fcvt s2, h9 975; CHECK-GI-NEXT: fcvt h0, s0 976; CHECK-GI-NEXT: fcvt s1, h12 977; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 978; CHECK-GI-NEXT: fmov s0, s2 979; CHECK-GI-NEXT: bl powf 980; CHECK-GI-NEXT: fcvt s2, h10 981; CHECK-GI-NEXT: fcvt h0, s0 982; CHECK-GI-NEXT: fcvt s1, h13 983; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 984; CHECK-GI-NEXT: fmov s0, s2 985; CHECK-GI-NEXT: bl powf 986; CHECK-GI-NEXT: ldp q3, q2, [sp] // 32-byte Folded Reload 987; CHECK-GI-NEXT: fcvt h0, s0 988; CHECK-GI-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 989; CHECK-GI-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload 990; CHECK-GI-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload 991; CHECK-GI-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload 992; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] 993; CHECK-GI-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload 994; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] 995; CHECK-GI-NEXT: mov v1.h[3], v0.h[0] 996; CHECK-GI-NEXT: mov v0.16b, v1.16b 997; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 998; CHECK-GI-NEXT: add sp, sp, #112 999; CHECK-GI-NEXT: ret 1000entry: 1001 %c = call <4 x half> @llvm.pow.v4f16(<4 x half> %a, <4 x half> %b) 1002 ret <4 x half> %c 1003} 1004 1005define <8 x half> @pow_v8f16(<8 x half> %a, <8 x half> %b) { 1006; CHECK-SD-LABEL: pow_v8f16: 1007; CHECK-SD: // %bb.0: // %entry 1008; CHECK-SD-NEXT: sub sp, sp, #64 1009; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 1010; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 1011; CHECK-SD-NEXT: .cfi_offset w30, -16 1012; CHECK-SD-NEXT: mov h3, v0.h[1] 1013; CHECK-SD-NEXT: mov h2, v1.h[1] 1014; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill 1015; CHECK-SD-NEXT: fcvt s0, h3 1016; CHECK-SD-NEXT: fcvt s1, h2 1017; CHECK-SD-NEXT: bl powf 1018; CHECK-SD-NEXT: fcvt h0, s0 1019; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 1020; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 1021; CHECK-SD-NEXT: fcvt s0, h0 1022; CHECK-SD-NEXT: fcvt s1, h1 1023; CHECK-SD-NEXT: bl powf 1024; CHECK-SD-NEXT: fcvt h0, s0 1025; CHECK-SD-NEXT: ldp q1, q2, [sp, #16] // 32-byte Folded Reload 1026; CHECK-SD-NEXT: mov h1, v1.h[2] 1027; CHECK-SD-NEXT: mov v0.h[1], v2.h[0] 1028; CHECK-SD-NEXT: fcvt s1, h1 1029; CHECK-SD-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 1030; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1031; CHECK-SD-NEXT: mov h0, v0.h[2] 1032; CHECK-SD-NEXT: fcvt s0, h0 1033; CHECK-SD-NEXT: bl powf 1034; CHECK-SD-NEXT: fcvt h0, s0 1035; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1036; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] 1037; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 1038; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 1039; CHECK-SD-NEXT: mov h0, v0.h[3] 1040; CHECK-SD-NEXT: mov h1, v1.h[3] 1041; CHECK-SD-NEXT: fcvt s0, h0 1042; CHECK-SD-NEXT: fcvt s1, h1 1043; CHECK-SD-NEXT: bl powf 1044; CHECK-SD-NEXT: fcvt h0, s0 1045; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1046; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] 1047; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 1048; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 1049; CHECK-SD-NEXT: mov h0, v0.h[4] 1050; CHECK-SD-NEXT: mov h1, v1.h[4] 1051; CHECK-SD-NEXT: fcvt s0, h0 1052; CHECK-SD-NEXT: fcvt s1, h1 1053; CHECK-SD-NEXT: bl powf 1054; CHECK-SD-NEXT: fcvt h0, s0 1055; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1056; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] 1057; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 1058; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 1059; CHECK-SD-NEXT: mov h0, v0.h[5] 1060; CHECK-SD-NEXT: mov h1, v1.h[5] 1061; CHECK-SD-NEXT: fcvt s0, h0 1062; CHECK-SD-NEXT: fcvt s1, h1 1063; CHECK-SD-NEXT: bl powf 1064; CHECK-SD-NEXT: fcvt h0, s0 1065; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1066; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] 1067; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 1068; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 1069; CHECK-SD-NEXT: mov h0, v0.h[6] 1070; CHECK-SD-NEXT: mov h1, v1.h[6] 1071; CHECK-SD-NEXT: fcvt s0, h0 1072; CHECK-SD-NEXT: fcvt s1, h1 1073; CHECK-SD-NEXT: bl powf 1074; CHECK-SD-NEXT: fcvt h0, s0 1075; CHECK-SD-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload 1076; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] 1077; CHECK-SD-NEXT: str q1, [sp, #32] // 16-byte Folded Spill 1078; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload 1079; CHECK-SD-NEXT: mov h0, v0.h[7] 1080; CHECK-SD-NEXT: mov h1, v1.h[7] 1081; CHECK-SD-NEXT: fcvt s0, h0 1082; CHECK-SD-NEXT: fcvt s1, h1 1083; CHECK-SD-NEXT: bl powf 1084; CHECK-SD-NEXT: fcvt h1, s0 1085; CHECK-SD-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload 1086; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 1087; CHECK-SD-NEXT: mov v0.h[7], v1.h[0] 1088; CHECK-SD-NEXT: add sp, sp, #64 1089; CHECK-SD-NEXT: ret 1090; 1091; CHECK-GI-LABEL: pow_v8f16: 1092; CHECK-GI: // %bb.0: // %entry 1093; CHECK-GI-NEXT: sub sp, sp, #192 1094; CHECK-GI-NEXT: stp d15, d14, [sp, #112] // 16-byte Folded Spill 1095; CHECK-GI-NEXT: stp d13, d12, [sp, #128] // 16-byte Folded Spill 1096; CHECK-GI-NEXT: stp d11, d10, [sp, #144] // 16-byte Folded Spill 1097; CHECK-GI-NEXT: stp d9, d8, [sp, #160] // 16-byte Folded Spill 1098; CHECK-GI-NEXT: str x30, [sp, #176] // 8-byte Folded Spill 1099; CHECK-GI-NEXT: .cfi_def_cfa_offset 192 1100; CHECK-GI-NEXT: .cfi_offset w30, -16 1101; CHECK-GI-NEXT: .cfi_offset b8, -24 1102; CHECK-GI-NEXT: .cfi_offset b9, -32 1103; CHECK-GI-NEXT: .cfi_offset b10, -40 1104; CHECK-GI-NEXT: .cfi_offset b11, -48 1105; CHECK-GI-NEXT: .cfi_offset b12, -56 1106; CHECK-GI-NEXT: .cfi_offset b13, -64 1107; CHECK-GI-NEXT: .cfi_offset b14, -72 1108; CHECK-GI-NEXT: .cfi_offset b15, -80 1109; CHECK-GI-NEXT: mov h2, v0.h[5] 1110; CHECK-GI-NEXT: mov h11, v0.h[1] 1111; CHECK-GI-NEXT: mov h12, v0.h[2] 1112; CHECK-GI-NEXT: mov h13, v0.h[3] 1113; CHECK-GI-NEXT: mov h14, v0.h[4] 1114; CHECK-GI-NEXT: mov h8, v1.h[1] 1115; CHECK-GI-NEXT: mov h9, v1.h[2] 1116; CHECK-GI-NEXT: mov h10, v1.h[3] 1117; CHECK-GI-NEXT: mov h15, v1.h[4] 1118; CHECK-GI-NEXT: str h2, [sp, #48] // 2-byte Folded Spill 1119; CHECK-GI-NEXT: mov h2, v0.h[6] 1120; CHECK-GI-NEXT: str h2, [sp, #64] // 2-byte Folded Spill 1121; CHECK-GI-NEXT: mov h2, v0.h[7] 1122; CHECK-GI-NEXT: fcvt s0, h0 1123; CHECK-GI-NEXT: str h2, [sp, #96] // 2-byte Folded Spill 1124; CHECK-GI-NEXT: mov h2, v1.h[5] 1125; CHECK-GI-NEXT: str h2, [sp, #186] // 2-byte Folded Spill 1126; CHECK-GI-NEXT: mov h2, v1.h[6] 1127; CHECK-GI-NEXT: str h2, [sp, #188] // 2-byte Folded Spill 1128; CHECK-GI-NEXT: mov h2, v1.h[7] 1129; CHECK-GI-NEXT: fcvt s1, h1 1130; CHECK-GI-NEXT: str h2, [sp, #190] // 2-byte Folded Spill 1131; CHECK-GI-NEXT: bl powf 1132; CHECK-GI-NEXT: fcvt s2, h11 1133; CHECK-GI-NEXT: fcvt h0, s0 1134; CHECK-GI-NEXT: fcvt s1, h8 1135; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill 1136; CHECK-GI-NEXT: fmov s0, s2 1137; CHECK-GI-NEXT: bl powf 1138; CHECK-GI-NEXT: fcvt s2, h12 1139; CHECK-GI-NEXT: fcvt h0, s0 1140; CHECK-GI-NEXT: fcvt s1, h9 1141; CHECK-GI-NEXT: str q0, [sp, #32] // 16-byte Folded Spill 1142; CHECK-GI-NEXT: fmov s0, s2 1143; CHECK-GI-NEXT: bl powf 1144; CHECK-GI-NEXT: fcvt s2, h13 1145; CHECK-GI-NEXT: fcvt h0, s0 1146; CHECK-GI-NEXT: fcvt s1, h10 1147; CHECK-GI-NEXT: str q0, [sp, #16] // 16-byte Folded Spill 1148; CHECK-GI-NEXT: fmov s0, s2 1149; CHECK-GI-NEXT: bl powf 1150; CHECK-GI-NEXT: fcvt s2, h14 1151; CHECK-GI-NEXT: fcvt h0, s0 1152; CHECK-GI-NEXT: fcvt s1, h15 1153; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 1154; CHECK-GI-NEXT: fmov s0, s2 1155; CHECK-GI-NEXT: bl powf 1156; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload 1157; CHECK-GI-NEXT: fcvt h0, s0 1158; CHECK-GI-NEXT: fcvt s2, h1 1159; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill 1160; CHECK-GI-NEXT: ldr h0, [sp, #186] // 2-byte Folded Reload 1161; CHECK-GI-NEXT: fcvt s1, h0 1162; CHECK-GI-NEXT: fmov s0, s2 1163; CHECK-GI-NEXT: bl powf 1164; CHECK-GI-NEXT: ldr h1, [sp, #64] // 2-byte Folded Reload 1165; CHECK-GI-NEXT: fcvt h0, s0 1166; CHECK-GI-NEXT: fcvt s2, h1 1167; CHECK-GI-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 1168; CHECK-GI-NEXT: ldr h0, [sp, #188] // 2-byte Folded Reload 1169; CHECK-GI-NEXT: fcvt s1, h0 1170; CHECK-GI-NEXT: fmov s0, s2 1171; CHECK-GI-NEXT: bl powf 1172; CHECK-GI-NEXT: ldr h1, [sp, #96] // 2-byte Folded Reload 1173; CHECK-GI-NEXT: fcvt h0, s0 1174; CHECK-GI-NEXT: fcvt s2, h1 1175; CHECK-GI-NEXT: str q0, [sp, #96] // 16-byte Folded Spill 1176; CHECK-GI-NEXT: ldr h0, [sp, #190] // 2-byte Folded Reload 1177; CHECK-GI-NEXT: fcvt s1, h0 1178; CHECK-GI-NEXT: fmov s0, s2 1179; CHECK-GI-NEXT: bl powf 1180; CHECK-GI-NEXT: ldp q2, q3, [sp, #16] // 32-byte Folded Reload 1181; CHECK-GI-NEXT: fcvt h0, s0 1182; CHECK-GI-NEXT: ldr q1, [sp, #80] // 16-byte Folded Reload 1183; CHECK-GI-NEXT: ldp d9, d8, [sp, #160] // 16-byte Folded Reload 1184; CHECK-GI-NEXT: ldp d11, d10, [sp, #144] // 16-byte Folded Reload 1185; CHECK-GI-NEXT: ldr x30, [sp, #176] // 8-byte Folded Reload 1186; CHECK-GI-NEXT: mov v1.h[1], v3.h[0] 1187; CHECK-GI-NEXT: ldp d13, d12, [sp, #128] // 16-byte Folded Reload 1188; CHECK-GI-NEXT: ldp d15, d14, [sp, #112] // 16-byte Folded Reload 1189; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] 1190; CHECK-GI-NEXT: ldr q2, [sp] // 16-byte Folded Reload 1191; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] 1192; CHECK-GI-NEXT: ldp q3, q2, [sp, #48] // 32-byte Folded Reload 1193; CHECK-GI-NEXT: mov v1.h[4], v3.h[0] 1194; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] 1195; CHECK-GI-NEXT: ldr q2, [sp, #96] // 16-byte Folded Reload 1196; CHECK-GI-NEXT: mov v1.h[6], v2.h[0] 1197; CHECK-GI-NEXT: mov v1.h[7], v0.h[0] 1198; CHECK-GI-NEXT: mov v0.16b, v1.16b 1199; CHECK-GI-NEXT: add sp, sp, #192 1200; CHECK-GI-NEXT: ret 1201entry: 1202 %c = call <8 x half> @llvm.pow.v8f16(<8 x half> %a, <8 x half> %b) 1203 ret <8 x half> %c 1204} 1205 1206define <16 x half> @pow_v16f16(<16 x half> %a, <16 x half> %b) { 1207; CHECK-SD-LABEL: pow_v16f16: 1208; CHECK-SD: // %bb.0: // %entry 1209; CHECK-SD-NEXT: sub sp, sp, #96 1210; CHECK-SD-NEXT: str x30, [sp, #80] // 8-byte Folded Spill 1211; CHECK-SD-NEXT: .cfi_def_cfa_offset 96 1212; CHECK-SD-NEXT: .cfi_offset w30, -16 1213; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill 1214; CHECK-SD-NEXT: mov h1, v2.h[1] 1215; CHECK-SD-NEXT: stp q3, q2, [sp, #32] // 32-byte Folded Spill 1216; CHECK-SD-NEXT: mov h3, v0.h[1] 1217; CHECK-SD-NEXT: fcvt s1, h1 1218; CHECK-SD-NEXT: fcvt s0, h3 1219; CHECK-SD-NEXT: bl powf 1220; CHECK-SD-NEXT: fcvt h0, s0 1221; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload 1222; CHECK-SD-NEXT: fcvt s1, h1 1223; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 1224; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1225; CHECK-SD-NEXT: fcvt s0, h0 1226; CHECK-SD-NEXT: bl powf 1227; CHECK-SD-NEXT: fcvt h0, s0 1228; CHECK-SD-NEXT: ldp q1, q2, [sp, #48] // 32-byte Folded Reload 1229; CHECK-SD-NEXT: mov h1, v1.h[2] 1230; CHECK-SD-NEXT: mov v0.h[1], v2.h[0] 1231; CHECK-SD-NEXT: fcvt s1, h1 1232; CHECK-SD-NEXT: str q0, [sp, #64] // 16-byte Folded Spill 1233; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1234; CHECK-SD-NEXT: mov h0, v0.h[2] 1235; CHECK-SD-NEXT: fcvt s0, h0 1236; CHECK-SD-NEXT: bl powf 1237; CHECK-SD-NEXT: fcvt h0, s0 1238; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload 1239; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] 1240; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1241; CHECK-SD-NEXT: mov h0, v0.h[3] 1242; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill 1243; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload 1244; CHECK-SD-NEXT: mov h1, v1.h[3] 1245; CHECK-SD-NEXT: fcvt s0, h0 1246; CHECK-SD-NEXT: fcvt s1, h1 1247; CHECK-SD-NEXT: bl powf 1248; CHECK-SD-NEXT: fcvt h0, s0 1249; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload 1250; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] 1251; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1252; CHECK-SD-NEXT: mov h0, v0.h[4] 1253; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill 1254; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload 1255; CHECK-SD-NEXT: mov h1, v1.h[4] 1256; CHECK-SD-NEXT: fcvt s0, h0 1257; CHECK-SD-NEXT: fcvt s1, h1 1258; CHECK-SD-NEXT: bl powf 1259; CHECK-SD-NEXT: fcvt h0, s0 1260; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload 1261; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] 1262; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1263; CHECK-SD-NEXT: mov h0, v0.h[5] 1264; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill 1265; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload 1266; CHECK-SD-NEXT: mov h1, v1.h[5] 1267; CHECK-SD-NEXT: fcvt s0, h0 1268; CHECK-SD-NEXT: fcvt s1, h1 1269; CHECK-SD-NEXT: bl powf 1270; CHECK-SD-NEXT: fcvt h0, s0 1271; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload 1272; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] 1273; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1274; CHECK-SD-NEXT: mov h0, v0.h[6] 1275; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill 1276; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload 1277; CHECK-SD-NEXT: mov h1, v1.h[6] 1278; CHECK-SD-NEXT: fcvt s0, h0 1279; CHECK-SD-NEXT: fcvt s1, h1 1280; CHECK-SD-NEXT: bl powf 1281; CHECK-SD-NEXT: fcvt h0, s0 1282; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload 1283; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] 1284; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1285; CHECK-SD-NEXT: mov h0, v0.h[7] 1286; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill 1287; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload 1288; CHECK-SD-NEXT: mov h1, v1.h[7] 1289; CHECK-SD-NEXT: fcvt s0, h0 1290; CHECK-SD-NEXT: fcvt s1, h1 1291; CHECK-SD-NEXT: bl powf 1292; CHECK-SD-NEXT: fcvt h0, s0 1293; CHECK-SD-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload 1294; CHECK-SD-NEXT: mov v1.h[7], v0.h[0] 1295; CHECK-SD-NEXT: str q1, [sp, #64] // 16-byte Folded Spill 1296; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 1297; CHECK-SD-NEXT: mov h0, v0.h[1] 1298; CHECK-SD-NEXT: mov h1, v1.h[1] 1299; CHECK-SD-NEXT: fcvt s0, h0 1300; CHECK-SD-NEXT: fcvt s1, h1 1301; CHECK-SD-NEXT: bl powf 1302; CHECK-SD-NEXT: fcvt h0, s0 1303; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill 1304; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 1305; CHECK-SD-NEXT: fcvt s0, h0 1306; CHECK-SD-NEXT: fcvt s1, h1 1307; CHECK-SD-NEXT: bl powf 1308; CHECK-SD-NEXT: fcvt h0, s0 1309; CHECK-SD-NEXT: ldp q1, q2, [sp, #32] // 32-byte Folded Reload 1310; CHECK-SD-NEXT: mov h1, v1.h[2] 1311; CHECK-SD-NEXT: mov v0.h[1], v2.h[0] 1312; CHECK-SD-NEXT: fcvt s1, h1 1313; CHECK-SD-NEXT: str q0, [sp, #48] // 16-byte Folded Spill 1314; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1315; CHECK-SD-NEXT: mov h0, v0.h[2] 1316; CHECK-SD-NEXT: fcvt s0, h0 1317; CHECK-SD-NEXT: bl powf 1318; CHECK-SD-NEXT: fcvt h0, s0 1319; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload 1320; CHECK-SD-NEXT: mov v1.h[2], v0.h[0] 1321; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill 1322; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 1323; CHECK-SD-NEXT: mov h0, v0.h[3] 1324; CHECK-SD-NEXT: mov h1, v1.h[3] 1325; CHECK-SD-NEXT: fcvt s0, h0 1326; CHECK-SD-NEXT: fcvt s1, h1 1327; CHECK-SD-NEXT: bl powf 1328; CHECK-SD-NEXT: fcvt h0, s0 1329; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload 1330; CHECK-SD-NEXT: mov v1.h[3], v0.h[0] 1331; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill 1332; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 1333; CHECK-SD-NEXT: mov h0, v0.h[4] 1334; CHECK-SD-NEXT: mov h1, v1.h[4] 1335; CHECK-SD-NEXT: fcvt s0, h0 1336; CHECK-SD-NEXT: fcvt s1, h1 1337; CHECK-SD-NEXT: bl powf 1338; CHECK-SD-NEXT: fcvt h0, s0 1339; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload 1340; CHECK-SD-NEXT: mov v1.h[4], v0.h[0] 1341; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill 1342; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 1343; CHECK-SD-NEXT: mov h0, v0.h[5] 1344; CHECK-SD-NEXT: mov h1, v1.h[5] 1345; CHECK-SD-NEXT: fcvt s0, h0 1346; CHECK-SD-NEXT: fcvt s1, h1 1347; CHECK-SD-NEXT: bl powf 1348; CHECK-SD-NEXT: fcvt h0, s0 1349; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload 1350; CHECK-SD-NEXT: mov v1.h[5], v0.h[0] 1351; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill 1352; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 1353; CHECK-SD-NEXT: mov h0, v0.h[6] 1354; CHECK-SD-NEXT: mov h1, v1.h[6] 1355; CHECK-SD-NEXT: fcvt s0, h0 1356; CHECK-SD-NEXT: fcvt s1, h1 1357; CHECK-SD-NEXT: bl powf 1358; CHECK-SD-NEXT: fcvt h0, s0 1359; CHECK-SD-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload 1360; CHECK-SD-NEXT: mov v1.h[6], v0.h[0] 1361; CHECK-SD-NEXT: str q1, [sp, #48] // 16-byte Folded Spill 1362; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 1363; CHECK-SD-NEXT: mov h0, v0.h[7] 1364; CHECK-SD-NEXT: mov h1, v1.h[7] 1365; CHECK-SD-NEXT: fcvt s0, h0 1366; CHECK-SD-NEXT: fcvt s1, h1 1367; CHECK-SD-NEXT: bl powf 1368; CHECK-SD-NEXT: fmov s1, s0 1369; CHECK-SD-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload 1370; CHECK-SD-NEXT: fcvt h2, s1 1371; CHECK-SD-NEXT: ldp q1, q0, [sp, #48] // 32-byte Folded Reload 1372; CHECK-SD-NEXT: mov v1.h[7], v2.h[0] 1373; CHECK-SD-NEXT: add sp, sp, #96 1374; CHECK-SD-NEXT: ret 1375; 1376; CHECK-GI-LABEL: pow_v16f16: 1377; CHECK-GI: // %bb.0: // %entry 1378; CHECK-GI-NEXT: sub sp, sp, #448 1379; CHECK-GI-NEXT: stp d15, d14, [sp, #368] // 16-byte Folded Spill 1380; CHECK-GI-NEXT: stp d13, d12, [sp, #384] // 16-byte Folded Spill 1381; CHECK-GI-NEXT: stp d11, d10, [sp, #400] // 16-byte Folded Spill 1382; CHECK-GI-NEXT: stp d9, d8, [sp, #416] // 16-byte Folded Spill 1383; CHECK-GI-NEXT: stp x29, x30, [sp, #432] // 16-byte Folded Spill 1384; CHECK-GI-NEXT: .cfi_def_cfa_offset 448 1385; CHECK-GI-NEXT: .cfi_offset w30, -8 1386; CHECK-GI-NEXT: .cfi_offset w29, -16 1387; CHECK-GI-NEXT: .cfi_offset b8, -24 1388; CHECK-GI-NEXT: .cfi_offset b9, -32 1389; CHECK-GI-NEXT: .cfi_offset b10, -40 1390; CHECK-GI-NEXT: .cfi_offset b11, -48 1391; CHECK-GI-NEXT: .cfi_offset b12, -56 1392; CHECK-GI-NEXT: .cfi_offset b13, -64 1393; CHECK-GI-NEXT: .cfi_offset b14, -72 1394; CHECK-GI-NEXT: .cfi_offset b15, -80 1395; CHECK-GI-NEXT: mov v4.16b, v1.16b 1396; CHECK-GI-NEXT: str q1, [sp, #112] // 16-byte Folded Spill 1397; CHECK-GI-NEXT: mov h1, v0.h[4] 1398; CHECK-GI-NEXT: mov h12, v0.h[1] 1399; CHECK-GI-NEXT: mov h13, v0.h[2] 1400; CHECK-GI-NEXT: str q3, [sp, #16] // 16-byte Folded Spill 1401; CHECK-GI-NEXT: mov h14, v0.h[3] 1402; CHECK-GI-NEXT: mov h15, v2.h[1] 1403; CHECK-GI-NEXT: mov h8, v2.h[2] 1404; CHECK-GI-NEXT: mov h9, v2.h[3] 1405; CHECK-GI-NEXT: mov h10, v2.h[4] 1406; CHECK-GI-NEXT: mov h11, v2.h[5] 1407; CHECK-GI-NEXT: str h1, [sp, #272] // 2-byte Folded Spill 1408; CHECK-GI-NEXT: mov h1, v0.h[5] 1409; CHECK-GI-NEXT: str h1, [sp, #240] // 2-byte Folded Spill 1410; CHECK-GI-NEXT: mov h1, v0.h[6] 1411; CHECK-GI-NEXT: str h1, [sp, #176] // 2-byte Folded Spill 1412; CHECK-GI-NEXT: mov h1, v0.h[7] 1413; CHECK-GI-NEXT: fcvt s0, h0 1414; CHECK-GI-NEXT: str h1, [sp, #144] // 2-byte Folded Spill 1415; CHECK-GI-NEXT: mov h1, v4.h[1] 1416; CHECK-GI-NEXT: str h1, [sp, #48] // 2-byte Folded Spill 1417; CHECK-GI-NEXT: mov h1, v4.h[2] 1418; CHECK-GI-NEXT: str h1, [sp, #80] // 2-byte Folded Spill 1419; CHECK-GI-NEXT: mov h1, v4.h[3] 1420; CHECK-GI-NEXT: str h1, [sp, #128] // 2-byte Folded Spill 1421; CHECK-GI-NEXT: mov h1, v4.h[4] 1422; CHECK-GI-NEXT: str h1, [sp, #192] // 2-byte Folded Spill 1423; CHECK-GI-NEXT: mov h1, v4.h[5] 1424; CHECK-GI-NEXT: str h1, [sp, #256] // 2-byte Folded Spill 1425; CHECK-GI-NEXT: mov h1, v4.h[6] 1426; CHECK-GI-NEXT: str h1, [sp, #336] // 2-byte Folded Spill 1427; CHECK-GI-NEXT: mov h1, v4.h[7] 1428; CHECK-GI-NEXT: str h1, [sp, #352] // 2-byte Folded Spill 1429; CHECK-GI-NEXT: mov h1, v2.h[6] 1430; CHECK-GI-NEXT: str h1, [sp, #12] // 2-byte Folded Spill 1431; CHECK-GI-NEXT: mov h1, v2.h[7] 1432; CHECK-GI-NEXT: str h1, [sp, #14] // 2-byte Folded Spill 1433; CHECK-GI-NEXT: mov h1, v3.h[1] 1434; CHECK-GI-NEXT: str h1, [sp, #44] // 2-byte Folded Spill 1435; CHECK-GI-NEXT: mov h1, v3.h[2] 1436; CHECK-GI-NEXT: str h1, [sp, #46] // 2-byte Folded Spill 1437; CHECK-GI-NEXT: mov h1, v3.h[3] 1438; CHECK-GI-NEXT: str h1, [sp, #78] // 2-byte Folded Spill 1439; CHECK-GI-NEXT: mov h1, v3.h[4] 1440; CHECK-GI-NEXT: str h1, [sp, #110] // 2-byte Folded Spill 1441; CHECK-GI-NEXT: mov h1, v3.h[5] 1442; CHECK-GI-NEXT: str h1, [sp, #174] // 2-byte Folded Spill 1443; CHECK-GI-NEXT: mov h1, v3.h[6] 1444; CHECK-GI-NEXT: str h1, [sp, #238] // 2-byte Folded Spill 1445; CHECK-GI-NEXT: mov h1, v3.h[7] 1446; CHECK-GI-NEXT: str h1, [sp, #302] // 2-byte Folded Spill 1447; CHECK-GI-NEXT: fcvt s1, h2 1448; CHECK-GI-NEXT: bl powf 1449; CHECK-GI-NEXT: fcvt s2, h12 1450; CHECK-GI-NEXT: fcvt h0, s0 1451; CHECK-GI-NEXT: fcvt s1, h15 1452; CHECK-GI-NEXT: str q0, [sp, #304] // 16-byte Folded Spill 1453; CHECK-GI-NEXT: fmov s0, s2 1454; CHECK-GI-NEXT: bl powf 1455; CHECK-GI-NEXT: fcvt s2, h13 1456; CHECK-GI-NEXT: fcvt h0, s0 1457; CHECK-GI-NEXT: fcvt s1, h8 1458; CHECK-GI-NEXT: str q0, [sp, #208] // 16-byte Folded Spill 1459; CHECK-GI-NEXT: fmov s0, s2 1460; CHECK-GI-NEXT: bl powf 1461; CHECK-GI-NEXT: fcvt s2, h14 1462; CHECK-GI-NEXT: fcvt h0, s0 1463; CHECK-GI-NEXT: fcvt s1, h9 1464; CHECK-GI-NEXT: str q0, [sp, #320] // 16-byte Folded Spill 1465; CHECK-GI-NEXT: fmov s0, s2 1466; CHECK-GI-NEXT: bl powf 1467; CHECK-GI-NEXT: ldr h1, [sp, #272] // 2-byte Folded Reload 1468; CHECK-GI-NEXT: fcvt h0, s0 1469; CHECK-GI-NEXT: fcvt s2, h1 1470; CHECK-GI-NEXT: fcvt s1, h10 1471; CHECK-GI-NEXT: str q0, [sp, #272] // 16-byte Folded Spill 1472; CHECK-GI-NEXT: fmov s0, s2 1473; CHECK-GI-NEXT: bl powf 1474; CHECK-GI-NEXT: ldr h1, [sp, #240] // 2-byte Folded Reload 1475; CHECK-GI-NEXT: fcvt h0, s0 1476; CHECK-GI-NEXT: fcvt s2, h1 1477; CHECK-GI-NEXT: fcvt s1, h11 1478; CHECK-GI-NEXT: str q0, [sp, #240] // 16-byte Folded Spill 1479; CHECK-GI-NEXT: fmov s0, s2 1480; CHECK-GI-NEXT: bl powf 1481; CHECK-GI-NEXT: ldr h1, [sp, #176] // 2-byte Folded Reload 1482; CHECK-GI-NEXT: fcvt h0, s0 1483; CHECK-GI-NEXT: fcvt s2, h1 1484; CHECK-GI-NEXT: str q0, [sp, #176] // 16-byte Folded Spill 1485; CHECK-GI-NEXT: ldr h0, [sp, #12] // 2-byte Folded Reload 1486; CHECK-GI-NEXT: fcvt s1, h0 1487; CHECK-GI-NEXT: fmov s0, s2 1488; CHECK-GI-NEXT: bl powf 1489; CHECK-GI-NEXT: ldr h1, [sp, #144] // 2-byte Folded Reload 1490; CHECK-GI-NEXT: fcvt h0, s0 1491; CHECK-GI-NEXT: fcvt s2, h1 1492; CHECK-GI-NEXT: str q0, [sp, #144] // 16-byte Folded Spill 1493; CHECK-GI-NEXT: ldr h0, [sp, #14] // 2-byte Folded Reload 1494; CHECK-GI-NEXT: fcvt s1, h0 1495; CHECK-GI-NEXT: fmov s0, s2 1496; CHECK-GI-NEXT: bl powf 1497; CHECK-GI-NEXT: ldr q1, [sp, #112] // 16-byte Folded Reload 1498; CHECK-GI-NEXT: fcvt h0, s0 1499; CHECK-GI-NEXT: fcvt s2, h1 1500; CHECK-GI-NEXT: str q0, [sp, #112] // 16-byte Folded Spill 1501; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload 1502; CHECK-GI-NEXT: fcvt s1, h0 1503; CHECK-GI-NEXT: fmov s0, s2 1504; CHECK-GI-NEXT: bl powf 1505; CHECK-GI-NEXT: ldr h1, [sp, #48] // 2-byte Folded Reload 1506; CHECK-GI-NEXT: fcvt h0, s0 1507; CHECK-GI-NEXT: fcvt s2, h1 1508; CHECK-GI-NEXT: str q0, [sp, #48] // 16-byte Folded Spill 1509; CHECK-GI-NEXT: ldr h0, [sp, #44] // 2-byte Folded Reload 1510; CHECK-GI-NEXT: fcvt s1, h0 1511; CHECK-GI-NEXT: fmov s0, s2 1512; CHECK-GI-NEXT: bl powf 1513; CHECK-GI-NEXT: ldr h1, [sp, #80] // 2-byte Folded Reload 1514; CHECK-GI-NEXT: fcvt h0, s0 1515; CHECK-GI-NEXT: fcvt s2, h1 1516; CHECK-GI-NEXT: str q0, [sp, #80] // 16-byte Folded Spill 1517; CHECK-GI-NEXT: ldr h0, [sp, #46] // 2-byte Folded Reload 1518; CHECK-GI-NEXT: fcvt s1, h0 1519; CHECK-GI-NEXT: fmov s0, s2 1520; CHECK-GI-NEXT: bl powf 1521; CHECK-GI-NEXT: ldr h1, [sp, #128] // 2-byte Folded Reload 1522; CHECK-GI-NEXT: fcvt h0, s0 1523; CHECK-GI-NEXT: fcvt s2, h1 1524; CHECK-GI-NEXT: str q0, [sp, #128] // 16-byte Folded Spill 1525; CHECK-GI-NEXT: ldr h0, [sp, #78] // 2-byte Folded Reload 1526; CHECK-GI-NEXT: fcvt s1, h0 1527; CHECK-GI-NEXT: fmov s0, s2 1528; CHECK-GI-NEXT: bl powf 1529; CHECK-GI-NEXT: ldr h1, [sp, #192] // 2-byte Folded Reload 1530; CHECK-GI-NEXT: fcvt h0, s0 1531; CHECK-GI-NEXT: fcvt s2, h1 1532; CHECK-GI-NEXT: str q0, [sp, #192] // 16-byte Folded Spill 1533; CHECK-GI-NEXT: ldr h0, [sp, #110] // 2-byte Folded Reload 1534; CHECK-GI-NEXT: fcvt s1, h0 1535; CHECK-GI-NEXT: fmov s0, s2 1536; CHECK-GI-NEXT: bl powf 1537; CHECK-GI-NEXT: ldr h1, [sp, #256] // 2-byte Folded Reload 1538; CHECK-GI-NEXT: fcvt h0, s0 1539; CHECK-GI-NEXT: fcvt s2, h1 1540; CHECK-GI-NEXT: str q0, [sp, #256] // 16-byte Folded Spill 1541; CHECK-GI-NEXT: ldr h0, [sp, #174] // 2-byte Folded Reload 1542; CHECK-GI-NEXT: fcvt s1, h0 1543; CHECK-GI-NEXT: fmov s0, s2 1544; CHECK-GI-NEXT: bl powf 1545; CHECK-GI-NEXT: ldr h1, [sp, #336] // 2-byte Folded Reload 1546; CHECK-GI-NEXT: fcvt h0, s0 1547; CHECK-GI-NEXT: fcvt s2, h1 1548; CHECK-GI-NEXT: str q0, [sp, #336] // 16-byte Folded Spill 1549; CHECK-GI-NEXT: ldr h0, [sp, #238] // 2-byte Folded Reload 1550; CHECK-GI-NEXT: fcvt s1, h0 1551; CHECK-GI-NEXT: fmov s0, s2 1552; CHECK-GI-NEXT: bl powf 1553; CHECK-GI-NEXT: ldr h1, [sp, #352] // 2-byte Folded Reload 1554; CHECK-GI-NEXT: fcvt h0, s0 1555; CHECK-GI-NEXT: fcvt s2, h1 1556; CHECK-GI-NEXT: str q0, [sp, #352] // 16-byte Folded Spill 1557; CHECK-GI-NEXT: ldr h0, [sp, #302] // 2-byte Folded Reload 1558; CHECK-GI-NEXT: fcvt s1, h0 1559; CHECK-GI-NEXT: fmov s0, s2 1560; CHECK-GI-NEXT: bl powf 1561; CHECK-GI-NEXT: ldr q3, [sp, #304] // 16-byte Folded Reload 1562; CHECK-GI-NEXT: ldr q2, [sp, #208] // 16-byte Folded Reload 1563; CHECK-GI-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload 1564; CHECK-GI-NEXT: ldp x29, x30, [sp, #432] // 16-byte Folded Reload 1565; CHECK-GI-NEXT: mov v3.h[1], v2.h[0] 1566; CHECK-GI-NEXT: ldr q2, [sp, #80] // 16-byte Folded Reload 1567; CHECK-GI-NEXT: ldp d9, d8, [sp, #416] // 16-byte Folded Reload 1568; CHECK-GI-NEXT: mov v1.h[1], v2.h[0] 1569; CHECK-GI-NEXT: ldr q2, [sp, #320] // 16-byte Folded Reload 1570; CHECK-GI-NEXT: ldp d11, d10, [sp, #400] // 16-byte Folded Reload 1571; CHECK-GI-NEXT: mov v3.h[2], v2.h[0] 1572; CHECK-GI-NEXT: ldr q2, [sp, #128] // 16-byte Folded Reload 1573; CHECK-GI-NEXT: ldp d13, d12, [sp, #384] // 16-byte Folded Reload 1574; CHECK-GI-NEXT: mov v1.h[2], v2.h[0] 1575; CHECK-GI-NEXT: ldr q2, [sp, #272] // 16-byte Folded Reload 1576; CHECK-GI-NEXT: ldp d15, d14, [sp, #368] // 16-byte Folded Reload 1577; CHECK-GI-NEXT: mov v3.h[3], v2.h[0] 1578; CHECK-GI-NEXT: ldr q2, [sp, #192] // 16-byte Folded Reload 1579; CHECK-GI-NEXT: mov v1.h[3], v2.h[0] 1580; CHECK-GI-NEXT: ldp q4, q2, [sp, #240] // 32-byte Folded Reload 1581; CHECK-GI-NEXT: mov v3.h[4], v4.h[0] 1582; CHECK-GI-NEXT: mov v1.h[4], v2.h[0] 1583; CHECK-GI-NEXT: ldr q2, [sp, #176] // 16-byte Folded Reload 1584; CHECK-GI-NEXT: mov v3.h[5], v2.h[0] 1585; CHECK-GI-NEXT: ldr q2, [sp, #336] // 16-byte Folded Reload 1586; CHECK-GI-NEXT: mov v1.h[5], v2.h[0] 1587; CHECK-GI-NEXT: fcvt h2, s0 1588; CHECK-GI-NEXT: ldr q0, [sp, #144] // 16-byte Folded Reload 1589; CHECK-GI-NEXT: mov v3.h[6], v0.h[0] 1590; CHECK-GI-NEXT: ldr q0, [sp, #352] // 16-byte Folded Reload 1591; CHECK-GI-NEXT: mov v1.h[6], v0.h[0] 1592; CHECK-GI-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload 1593; CHECK-GI-NEXT: mov v3.h[7], v0.h[0] 1594; CHECK-GI-NEXT: mov v1.h[7], v2.h[0] 1595; CHECK-GI-NEXT: mov v0.16b, v3.16b 1596; CHECK-GI-NEXT: add sp, sp, #448 1597; CHECK-GI-NEXT: ret 1598entry: 1599 %c = call <16 x half> @llvm.pow.v16f16(<16 x half> %a, <16 x half> %b) 1600 ret <16 x half> %c 1601} 1602 1603define <2 x fp128> @pow_v2fp128(<2 x fp128> %a, <2 x fp128> %b) { 1604; CHECK-SD-LABEL: pow_v2fp128: 1605; CHECK-SD: // %bb.0: // %entry 1606; CHECK-SD-NEXT: sub sp, sp, #64 1607; CHECK-SD-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 1608; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 1609; CHECK-SD-NEXT: .cfi_offset w30, -16 1610; CHECK-SD-NEXT: stp q1, q3, [sp, #16] // 32-byte Folded Spill 1611; CHECK-SD-NEXT: mov v1.16b, v2.16b 1612; CHECK-SD-NEXT: bl powl 1613; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill 1614; CHECK-SD-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload 1615; CHECK-SD-NEXT: bl powl 1616; CHECK-SD-NEXT: mov v1.16b, v0.16b 1617; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1618; CHECK-SD-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 1619; CHECK-SD-NEXT: add sp, sp, #64 1620; CHECK-SD-NEXT: ret 1621; 1622; CHECK-GI-LABEL: pow_v2fp128: 1623; CHECK-GI: // %bb.0: // %entry 1624; CHECK-GI-NEXT: sub sp, sp, #64 1625; CHECK-GI-NEXT: str x30, [sp, #48] // 8-byte Folded Spill 1626; CHECK-GI-NEXT: .cfi_def_cfa_offset 64 1627; CHECK-GI-NEXT: .cfi_offset w30, -16 1628; CHECK-GI-NEXT: stp q3, q1, [sp, #16] // 32-byte Folded Spill 1629; CHECK-GI-NEXT: mov v1.16b, v2.16b 1630; CHECK-GI-NEXT: bl powl 1631; CHECK-GI-NEXT: str q0, [sp] // 16-byte Folded Spill 1632; CHECK-GI-NEXT: ldp q1, q0, [sp, #16] // 32-byte Folded Reload 1633; CHECK-GI-NEXT: bl powl 1634; CHECK-GI-NEXT: mov v1.16b, v0.16b 1635; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload 1636; CHECK-GI-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload 1637; CHECK-GI-NEXT: add sp, sp, #64 1638; CHECK-GI-NEXT: ret 1639entry: 1640 %c = call <2 x fp128> @llvm.pow.v2fp128(<2 x fp128> %a, <2 x fp128> %b) 1641 ret <2 x fp128> %c 1642} 1643 1644declare <16 x half> @llvm.pow.v16f16(<16 x half>, <16 x half>) 1645declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) 1646declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) 1647declare <2 x fp128> @llvm.pow.v2fp128(<2 x fp128>, <2 x fp128>) 1648declare <3 x double> @llvm.pow.v3f64(<3 x double>, <3 x double>) 1649declare <3 x float> @llvm.pow.v3f32(<3 x float>, <3 x float>) 1650declare <4 x double> @llvm.pow.v4f64(<4 x double>, <4 x double>) 1651declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) 1652declare <4 x half> @llvm.pow.v4f16(<4 x half>, <4 x half>) 1653declare <7 x half> @llvm.pow.v7f16(<7 x half>, <7 x half>) 1654declare <8 x float> @llvm.pow.v8f32(<8 x float>, <8 x float>) 1655declare <8 x half> @llvm.pow.v8f16(<8 x half>, <8 x half>) 1656declare double @llvm.pow.f64(double, double) 1657declare float @llvm.pow.f32(float, float) 1658declare fp128 @llvm.pow.fp128(fp128, fp128) 1659declare half @llvm.pow.f16(half, half) 1660