1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,FULLFP16 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,MVEFP 4; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,FULLFP16 5; RUN: llc -early-live-intervals -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,MVEFP 6 7define arm_aapcs_vfpcc <4 x float> @sqrt_float32_t(<4 x float> %src) { 8; CHECK-LABEL: sqrt_float32_t: 9; CHECK: @ %bb.0: @ %entry 10; CHECK-NEXT: vsqrt.f32 s3, s3 11; CHECK-NEXT: vsqrt.f32 s2, s2 12; CHECK-NEXT: vsqrt.f32 s1, s1 13; CHECK-NEXT: vsqrt.f32 s0, s0 14; CHECK-NEXT: bx lr 15entry: 16 %0 = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %src) 17 ret <4 x float> %0 18} 19 20define arm_aapcs_vfpcc <8 x half> @sqrt_float16_t(<8 x half> %src) { 21; CHECK-LABEL: sqrt_float16_t: 22; CHECK: @ %bb.0: @ %entry 23; CHECK-NEXT: vmovx.f16 s4, s0 24; CHECK-NEXT: vsqrt.f16 s0, s0 25; CHECK-NEXT: vsqrt.f16 s4, s4 26; CHECK-NEXT: vins.f16 s0, s4 27; CHECK-NEXT: vmovx.f16 s4, s1 28; CHECK-NEXT: vsqrt.f16 s4, s4 29; CHECK-NEXT: vsqrt.f16 s1, s1 30; CHECK-NEXT: vins.f16 s1, s4 31; CHECK-NEXT: vmovx.f16 s4, s2 32; CHECK-NEXT: vsqrt.f16 s4, s4 33; CHECK-NEXT: vsqrt.f16 s2, s2 34; CHECK-NEXT: vins.f16 s2, s4 35; CHECK-NEXT: vmovx.f16 s4, s3 36; CHECK-NEXT: vsqrt.f16 s4, s4 37; CHECK-NEXT: vsqrt.f16 s3, s3 38; CHECK-NEXT: vins.f16 s3, s4 39; CHECK-NEXT: bx lr 40entry: 41 %0 = call fast <8 x half> @llvm.sqrt.v8f16(<8 x half> %src) 42 ret <8 x half> %0 43} 44 45define arm_aapcs_vfpcc <2 x double> @sqrt_float64_t(<2 x double> %src) { 46; CHECK-LABEL: sqrt_float64_t: 47; CHECK: @ %bb.0: @ %entry 48; CHECK-NEXT: .save {r7, lr} 49; CHECK-NEXT: push {r7, lr} 50; CHECK-NEXT: .vsave {d8, d9} 51; CHECK-NEXT: vpush {d8, d9} 52; CHECK-NEXT: vmov q4, q0 53; CHECK-NEXT: vmov r0, r1, d9 54; CHECK-NEXT: bl sqrt 55; CHECK-NEXT: vmov r2, r3, d8 56; CHECK-NEXT: vmov d9, r0, r1 57; CHECK-NEXT: mov r0, r2 58; CHECK-NEXT: mov r1, r3 59; CHECK-NEXT: bl sqrt 60; CHECK-NEXT: vmov d8, r0, r1 61; CHECK-NEXT: vmov q0, q4 62; CHECK-NEXT: vpop {d8, d9} 63; CHECK-NEXT: pop {r7, pc} 64entry: 65 %0 = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %src) 66 ret <2 x double> %0 67} 68 69define arm_aapcs_vfpcc <4 x float> @cos_float32_t(<4 x float> %src) { 70; CHECK-LABEL: cos_float32_t: 71; CHECK: @ %bb.0: @ %entry 72; CHECK-NEXT: .save {r4, r5, r7, lr} 73; CHECK-NEXT: push {r4, r5, r7, lr} 74; CHECK-NEXT: .vsave {d8, d9} 75; CHECK-NEXT: vpush {d8, d9} 76; CHECK-NEXT: vmov q4, q0 77; CHECK-NEXT: vmov r0, r4, d9 78; CHECK-NEXT: bl cosf 79; CHECK-NEXT: mov r5, r0 80; CHECK-NEXT: mov r0, r4 81; CHECK-NEXT: bl cosf 82; CHECK-NEXT: vmov r4, r1, d8 83; CHECK-NEXT: vmov s19, r0 84; CHECK-NEXT: vmov s18, r5 85; CHECK-NEXT: mov r0, r1 86; CHECK-NEXT: bl cosf 87; CHECK-NEXT: vmov s17, r0 88; CHECK-NEXT: mov r0, r4 89; CHECK-NEXT: bl cosf 90; CHECK-NEXT: vmov s16, r0 91; CHECK-NEXT: vmov q0, q4 92; CHECK-NEXT: vpop {d8, d9} 93; CHECK-NEXT: pop {r4, r5, r7, pc} 94entry: 95 %0 = call fast <4 x float> @llvm.cos.v4f32(<4 x float> %src) 96 ret <4 x float> %0 97} 98 99define arm_aapcs_vfpcc <8 x half> @cos_float16_t(<8 x half> %src) { 100; CHECK-LABEL: cos_float16_t: 101; CHECK: @ %bb.0: @ %entry 102; CHECK-NEXT: .save {r7, lr} 103; CHECK-NEXT: push {r7, lr} 104; CHECK-NEXT: .vsave {d8, d9, d10, d11} 105; CHECK-NEXT: vpush {d8, d9, d10, d11} 106; CHECK-NEXT: vmov q4, q0 107; CHECK-NEXT: vcvtb.f32.f16 s0, s16 108; CHECK-NEXT: vmov r0, s0 109; CHECK-NEXT: bl cosf 110; CHECK-NEXT: vcvtt.f32.f16 s0, s16 111; CHECK-NEXT: vmov s16, r0 112; CHECK-NEXT: vmov r1, s0 113; CHECK-NEXT: mov r0, r1 114; CHECK-NEXT: bl cosf 115; CHECK-NEXT: vmov s0, r0 116; CHECK-NEXT: vcvtb.f16.f32 s20, s16 117; CHECK-NEXT: vcvtt.f16.f32 s20, s0 118; CHECK-NEXT: vcvtb.f32.f16 s0, s17 119; CHECK-NEXT: vmov r0, s0 120; CHECK-NEXT: bl cosf 121; CHECK-NEXT: vmov s0, r0 122; CHECK-NEXT: vcvtb.f16.f32 s21, s0 123; CHECK-NEXT: vcvtt.f32.f16 s0, s17 124; CHECK-NEXT: vmov r0, s0 125; CHECK-NEXT: bl cosf 126; CHECK-NEXT: vmov s0, r0 127; CHECK-NEXT: vcvtt.f16.f32 s21, s0 128; CHECK-NEXT: vcvtb.f32.f16 s0, s18 129; CHECK-NEXT: vmov r0, s0 130; CHECK-NEXT: bl cosf 131; CHECK-NEXT: vmov s0, r0 132; CHECK-NEXT: vcvtb.f16.f32 s22, s0 133; CHECK-NEXT: vcvtt.f32.f16 s0, s18 134; CHECK-NEXT: vmov r0, s0 135; CHECK-NEXT: bl cosf 136; CHECK-NEXT: vmov s0, r0 137; CHECK-NEXT: vcvtt.f16.f32 s22, s0 138; CHECK-NEXT: vcvtb.f32.f16 s0, s19 139; CHECK-NEXT: vmov r0, s0 140; CHECK-NEXT: bl cosf 141; CHECK-NEXT: vmov s0, r0 142; CHECK-NEXT: vcvtb.f16.f32 s23, s0 143; CHECK-NEXT: vcvtt.f32.f16 s0, s19 144; CHECK-NEXT: vmov r0, s0 145; CHECK-NEXT: bl cosf 146; CHECK-NEXT: vmov s0, r0 147; CHECK-NEXT: vcvtt.f16.f32 s23, s0 148; CHECK-NEXT: vmov q0, q5 149; CHECK-NEXT: vpop {d8, d9, d10, d11} 150; CHECK-NEXT: pop {r7, pc} 151entry: 152 %0 = call fast <8 x half> @llvm.cos.v8f16(<8 x half> %src) 153 ret <8 x half> %0 154} 155 156define arm_aapcs_vfpcc <2 x double> @cos_float64_t(<2 x double> %src) { 157; CHECK-LABEL: cos_float64_t: 158; CHECK: @ %bb.0: @ %entry 159; CHECK-NEXT: .save {r7, lr} 160; CHECK-NEXT: push {r7, lr} 161; CHECK-NEXT: .vsave {d8, d9} 162; CHECK-NEXT: vpush {d8, d9} 163; CHECK-NEXT: vmov q4, q0 164; CHECK-NEXT: vmov r0, r1, d9 165; CHECK-NEXT: bl cos 166; CHECK-NEXT: vmov r2, r3, d8 167; CHECK-NEXT: vmov d9, r0, r1 168; CHECK-NEXT: mov r0, r2 169; CHECK-NEXT: mov r1, r3 170; CHECK-NEXT: bl cos 171; CHECK-NEXT: vmov d8, r0, r1 172; CHECK-NEXT: vmov q0, q4 173; CHECK-NEXT: vpop {d8, d9} 174; CHECK-NEXT: pop {r7, pc} 175entry: 176 %0 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %src) 177 ret <2 x double> %0 178} 179 180define arm_aapcs_vfpcc <4 x float> @sin_float32_t(<4 x float> %src) { 181; CHECK-LABEL: sin_float32_t: 182; CHECK: @ %bb.0: @ %entry 183; CHECK-NEXT: .save {r4, r5, r7, lr} 184; CHECK-NEXT: push {r4, r5, r7, lr} 185; CHECK-NEXT: .vsave {d8, d9} 186; CHECK-NEXT: vpush {d8, d9} 187; CHECK-NEXT: vmov q4, q0 188; CHECK-NEXT: vmov r0, r4, d9 189; CHECK-NEXT: bl sinf 190; CHECK-NEXT: mov r5, r0 191; CHECK-NEXT: mov r0, r4 192; CHECK-NEXT: bl sinf 193; CHECK-NEXT: vmov r4, r1, d8 194; CHECK-NEXT: vmov s19, r0 195; CHECK-NEXT: vmov s18, r5 196; CHECK-NEXT: mov r0, r1 197; CHECK-NEXT: bl sinf 198; CHECK-NEXT: vmov s17, r0 199; CHECK-NEXT: mov r0, r4 200; CHECK-NEXT: bl sinf 201; CHECK-NEXT: vmov s16, r0 202; CHECK-NEXT: vmov q0, q4 203; CHECK-NEXT: vpop {d8, d9} 204; CHECK-NEXT: pop {r4, r5, r7, pc} 205entry: 206 %0 = call fast <4 x float> @llvm.sin.v4f32(<4 x float> %src) 207 ret <4 x float> %0 208} 209 210define arm_aapcs_vfpcc <8 x half> @sin_float16_t(<8 x half> %src) { 211; CHECK-LABEL: sin_float16_t: 212; CHECK: @ %bb.0: @ %entry 213; CHECK-NEXT: .save {r7, lr} 214; CHECK-NEXT: push {r7, lr} 215; CHECK-NEXT: .vsave {d8, d9, d10, d11} 216; CHECK-NEXT: vpush {d8, d9, d10, d11} 217; CHECK-NEXT: vmov q4, q0 218; CHECK-NEXT: vcvtb.f32.f16 s0, s16 219; CHECK-NEXT: vmov r0, s0 220; CHECK-NEXT: bl sinf 221; CHECK-NEXT: vcvtt.f32.f16 s0, s16 222; CHECK-NEXT: vmov s16, r0 223; CHECK-NEXT: vmov r1, s0 224; CHECK-NEXT: mov r0, r1 225; CHECK-NEXT: bl sinf 226; CHECK-NEXT: vmov s0, r0 227; CHECK-NEXT: vcvtb.f16.f32 s20, s16 228; CHECK-NEXT: vcvtt.f16.f32 s20, s0 229; CHECK-NEXT: vcvtb.f32.f16 s0, s17 230; CHECK-NEXT: vmov r0, s0 231; CHECK-NEXT: bl sinf 232; CHECK-NEXT: vmov s0, r0 233; CHECK-NEXT: vcvtb.f16.f32 s21, s0 234; CHECK-NEXT: vcvtt.f32.f16 s0, s17 235; CHECK-NEXT: vmov r0, s0 236; CHECK-NEXT: bl sinf 237; CHECK-NEXT: vmov s0, r0 238; CHECK-NEXT: vcvtt.f16.f32 s21, s0 239; CHECK-NEXT: vcvtb.f32.f16 s0, s18 240; CHECK-NEXT: vmov r0, s0 241; CHECK-NEXT: bl sinf 242; CHECK-NEXT: vmov s0, r0 243; CHECK-NEXT: vcvtb.f16.f32 s22, s0 244; CHECK-NEXT: vcvtt.f32.f16 s0, s18 245; CHECK-NEXT: vmov r0, s0 246; CHECK-NEXT: bl sinf 247; CHECK-NEXT: vmov s0, r0 248; CHECK-NEXT: vcvtt.f16.f32 s22, s0 249; CHECK-NEXT: vcvtb.f32.f16 s0, s19 250; CHECK-NEXT: vmov r0, s0 251; CHECK-NEXT: bl sinf 252; CHECK-NEXT: vmov s0, r0 253; CHECK-NEXT: vcvtb.f16.f32 s23, s0 254; CHECK-NEXT: vcvtt.f32.f16 s0, s19 255; CHECK-NEXT: vmov r0, s0 256; CHECK-NEXT: bl sinf 257; CHECK-NEXT: vmov s0, r0 258; CHECK-NEXT: vcvtt.f16.f32 s23, s0 259; CHECK-NEXT: vmov q0, q5 260; CHECK-NEXT: vpop {d8, d9, d10, d11} 261; CHECK-NEXT: pop {r7, pc} 262entry: 263 %0 = call fast <8 x half> @llvm.sin.v8f16(<8 x half> %src) 264 ret <8 x half> %0 265} 266 267define arm_aapcs_vfpcc <2 x double> @sin_float64_t(<2 x double> %src) { 268; CHECK-LABEL: sin_float64_t: 269; CHECK: @ %bb.0: @ %entry 270; CHECK-NEXT: .save {r7, lr} 271; CHECK-NEXT: push {r7, lr} 272; CHECK-NEXT: .vsave {d8, d9} 273; CHECK-NEXT: vpush {d8, d9} 274; CHECK-NEXT: vmov q4, q0 275; CHECK-NEXT: vmov r0, r1, d9 276; CHECK-NEXT: bl sin 277; CHECK-NEXT: vmov r2, r3, d8 278; CHECK-NEXT: vmov d9, r0, r1 279; CHECK-NEXT: mov r0, r2 280; CHECK-NEXT: mov r1, r3 281; CHECK-NEXT: bl sin 282; CHECK-NEXT: vmov d8, r0, r1 283; CHECK-NEXT: vmov q0, q4 284; CHECK-NEXT: vpop {d8, d9} 285; CHECK-NEXT: pop {r7, pc} 286entry: 287 %0 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %src) 288 ret <2 x double> %0 289} 290 291define arm_aapcs_vfpcc <4 x float> @tan_float32_t(<4 x float> %src) { 292; CHECK-LABEL: tan_float32_t: 293; CHECK: @ %bb.0: @ %entry 294; CHECK-NEXT: .save {r4, r5, r7, lr} 295; CHECK-NEXT: push {r4, r5, r7, lr} 296; CHECK-NEXT: .vsave {d8, d9} 297; CHECK-NEXT: vpush {d8, d9} 298; CHECK-NEXT: vmov q4, q0 299; CHECK-NEXT: vmov r0, r4, d9 300; CHECK-NEXT: bl tanf 301; CHECK-NEXT: mov r5, r0 302; CHECK-NEXT: mov r0, r4 303; CHECK-NEXT: bl tanf 304; CHECK-NEXT: vmov r4, r1, d8 305; CHECK-NEXT: vmov s19, r0 306; CHECK-NEXT: vmov s18, r5 307; CHECK-NEXT: mov r0, r1 308; CHECK-NEXT: bl tanf 309; CHECK-NEXT: vmov s17, r0 310; CHECK-NEXT: mov r0, r4 311; CHECK-NEXT: bl tanf 312; CHECK-NEXT: vmov s16, r0 313; CHECK-NEXT: vmov q0, q4 314; CHECK-NEXT: vpop {d8, d9} 315; CHECK-NEXT: pop {r4, r5, r7, pc} 316entry: 317 %0 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %src) 318 ret <4 x float> %0 319} 320 321define arm_aapcs_vfpcc <8 x half> @tan_float16_t(<8 x half> %src) { 322; CHECK-LABEL: tan_float16_t: 323; CHECK: @ %bb.0: @ %entry 324; CHECK-NEXT: .save {r7, lr} 325; CHECK-NEXT: push {r7, lr} 326; CHECK-NEXT: .vsave {d8, d9, d10, d11} 327; CHECK-NEXT: vpush {d8, d9, d10, d11} 328; CHECK-NEXT: vmov q4, q0 329; CHECK-NEXT: vcvtb.f32.f16 s0, s16 330; CHECK-NEXT: vmov r0, s0 331; CHECK-NEXT: bl tanf 332; CHECK-NEXT: vcvtt.f32.f16 s0, s16 333; CHECK-NEXT: vmov s16, r0 334; CHECK-NEXT: vmov r1, s0 335; CHECK-NEXT: mov r0, r1 336; CHECK-NEXT: bl tanf 337; CHECK-NEXT: vmov s0, r0 338; CHECK-NEXT: vcvtb.f16.f32 s20, s16 339; CHECK-NEXT: vcvtt.f16.f32 s20, s0 340; CHECK-NEXT: vcvtb.f32.f16 s0, s17 341; CHECK-NEXT: vmov r0, s0 342; CHECK-NEXT: bl tanf 343; CHECK-NEXT: vmov s0, r0 344; CHECK-NEXT: vcvtb.f16.f32 s21, s0 345; CHECK-NEXT: vcvtt.f32.f16 s0, s17 346; CHECK-NEXT: vmov r0, s0 347; CHECK-NEXT: bl tanf 348; CHECK-NEXT: vmov s0, r0 349; CHECK-NEXT: vcvtt.f16.f32 s21, s0 350; CHECK-NEXT: vcvtb.f32.f16 s0, s18 351; CHECK-NEXT: vmov r0, s0 352; CHECK-NEXT: bl tanf 353; CHECK-NEXT: vmov s0, r0 354; CHECK-NEXT: vcvtb.f16.f32 s22, s0 355; CHECK-NEXT: vcvtt.f32.f16 s0, s18 356; CHECK-NEXT: vmov r0, s0 357; CHECK-NEXT: bl tanf 358; CHECK-NEXT: vmov s0, r0 359; CHECK-NEXT: vcvtt.f16.f32 s22, s0 360; CHECK-NEXT: vcvtb.f32.f16 s0, s19 361; CHECK-NEXT: vmov r0, s0 362; CHECK-NEXT: bl tanf 363; CHECK-NEXT: vmov s0, r0 364; CHECK-NEXT: vcvtb.f16.f32 s23, s0 365; CHECK-NEXT: vcvtt.f32.f16 s0, s19 366; CHECK-NEXT: vmov r0, s0 367; CHECK-NEXT: bl tanf 368; CHECK-NEXT: vmov s0, r0 369; CHECK-NEXT: vcvtt.f16.f32 s23, s0 370; CHECK-NEXT: vmov q0, q5 371; CHECK-NEXT: vpop {d8, d9, d10, d11} 372; CHECK-NEXT: pop {r7, pc} 373entry: 374 %0 = call fast <8 x half> @llvm.tan.v8f16(<8 x half> %src) 375 ret <8 x half> %0 376} 377 378define arm_aapcs_vfpcc <2 x double> @tan_float64_t(<2 x double> %src) { 379; CHECK-LABEL: tan_float64_t: 380; CHECK: @ %bb.0: @ %entry 381; CHECK-NEXT: .save {r7, lr} 382; CHECK-NEXT: push {r7, lr} 383; CHECK-NEXT: .vsave {d8, d9} 384; CHECK-NEXT: vpush {d8, d9} 385; CHECK-NEXT: vmov q4, q0 386; CHECK-NEXT: vmov r0, r1, d9 387; CHECK-NEXT: bl tan 388; CHECK-NEXT: vmov r2, r3, d8 389; CHECK-NEXT: vmov d9, r0, r1 390; CHECK-NEXT: mov r0, r2 391; CHECK-NEXT: mov r1, r3 392; CHECK-NEXT: bl tan 393; CHECK-NEXT: vmov d8, r0, r1 394; CHECK-NEXT: vmov q0, q4 395; CHECK-NEXT: vpop {d8, d9} 396; CHECK-NEXT: pop {r7, pc} 397entry: 398 %0 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %src) 399 ret <2 x double> %0 400} 401 402define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) { 403; CHECK-LABEL: exp_float32_t: 404; CHECK: @ %bb.0: @ %entry 405; CHECK-NEXT: .save {r4, r5, r7, lr} 406; CHECK-NEXT: push {r4, r5, r7, lr} 407; CHECK-NEXT: .vsave {d8, d9} 408; CHECK-NEXT: vpush {d8, d9} 409; CHECK-NEXT: vmov q4, q0 410; CHECK-NEXT: vmov r0, r4, d9 411; CHECK-NEXT: bl expf 412; CHECK-NEXT: mov r5, r0 413; CHECK-NEXT: mov r0, r4 414; CHECK-NEXT: bl expf 415; CHECK-NEXT: vmov r4, r1, d8 416; CHECK-NEXT: vmov s19, r0 417; CHECK-NEXT: vmov s18, r5 418; CHECK-NEXT: mov r0, r1 419; CHECK-NEXT: bl expf 420; CHECK-NEXT: vmov s17, r0 421; CHECK-NEXT: mov r0, r4 422; CHECK-NEXT: bl expf 423; CHECK-NEXT: vmov s16, r0 424; CHECK-NEXT: vmov q0, q4 425; CHECK-NEXT: vpop {d8, d9} 426; CHECK-NEXT: pop {r4, r5, r7, pc} 427entry: 428 %0 = call fast <4 x float> @llvm.exp.v4f32(<4 x float> %src) 429 ret <4 x float> %0 430} 431 432define arm_aapcs_vfpcc <8 x half> @exp_float16_t(<8 x half> %src) { 433; CHECK-LABEL: exp_float16_t: 434; CHECK: @ %bb.0: @ %entry 435; CHECK-NEXT: .save {r7, lr} 436; CHECK-NEXT: push {r7, lr} 437; CHECK-NEXT: .vsave {d8, d9, d10, d11} 438; CHECK-NEXT: vpush {d8, d9, d10, d11} 439; CHECK-NEXT: vmov q4, q0 440; CHECK-NEXT: vcvtb.f32.f16 s0, s16 441; CHECK-NEXT: vmov r0, s0 442; CHECK-NEXT: bl expf 443; CHECK-NEXT: vcvtt.f32.f16 s0, s16 444; CHECK-NEXT: vmov s16, r0 445; CHECK-NEXT: vmov r1, s0 446; CHECK-NEXT: mov r0, r1 447; CHECK-NEXT: bl expf 448; CHECK-NEXT: vmov s0, r0 449; CHECK-NEXT: vcvtb.f16.f32 s20, s16 450; CHECK-NEXT: vcvtt.f16.f32 s20, s0 451; CHECK-NEXT: vcvtb.f32.f16 s0, s17 452; CHECK-NEXT: vmov r0, s0 453; CHECK-NEXT: bl expf 454; CHECK-NEXT: vmov s0, r0 455; CHECK-NEXT: vcvtb.f16.f32 s21, s0 456; CHECK-NEXT: vcvtt.f32.f16 s0, s17 457; CHECK-NEXT: vmov r0, s0 458; CHECK-NEXT: bl expf 459; CHECK-NEXT: vmov s0, r0 460; CHECK-NEXT: vcvtt.f16.f32 s21, s0 461; CHECK-NEXT: vcvtb.f32.f16 s0, s18 462; CHECK-NEXT: vmov r0, s0 463; CHECK-NEXT: bl expf 464; CHECK-NEXT: vmov s0, r0 465; CHECK-NEXT: vcvtb.f16.f32 s22, s0 466; CHECK-NEXT: vcvtt.f32.f16 s0, s18 467; CHECK-NEXT: vmov r0, s0 468; CHECK-NEXT: bl expf 469; CHECK-NEXT: vmov s0, r0 470; CHECK-NEXT: vcvtt.f16.f32 s22, s0 471; CHECK-NEXT: vcvtb.f32.f16 s0, s19 472; CHECK-NEXT: vmov r0, s0 473; CHECK-NEXT: bl expf 474; CHECK-NEXT: vmov s0, r0 475; CHECK-NEXT: vcvtb.f16.f32 s23, s0 476; CHECK-NEXT: vcvtt.f32.f16 s0, s19 477; CHECK-NEXT: vmov r0, s0 478; CHECK-NEXT: bl expf 479; CHECK-NEXT: vmov s0, r0 480; CHECK-NEXT: vcvtt.f16.f32 s23, s0 481; CHECK-NEXT: vmov q0, q5 482; CHECK-NEXT: vpop {d8, d9, d10, d11} 483; CHECK-NEXT: pop {r7, pc} 484entry: 485 %0 = call fast <8 x half> @llvm.exp.v8f16(<8 x half> %src) 486 ret <8 x half> %0 487} 488 489define arm_aapcs_vfpcc <2 x double> @exp_float64_t(<2 x double> %src) { 490; CHECK-LABEL: exp_float64_t: 491; CHECK: @ %bb.0: @ %entry 492; CHECK-NEXT: .save {r7, lr} 493; CHECK-NEXT: push {r7, lr} 494; CHECK-NEXT: .vsave {d8, d9} 495; CHECK-NEXT: vpush {d8, d9} 496; CHECK-NEXT: vmov q4, q0 497; CHECK-NEXT: vmov r0, r1, d9 498; CHECK-NEXT: bl exp 499; CHECK-NEXT: vmov r2, r3, d8 500; CHECK-NEXT: vmov d9, r0, r1 501; CHECK-NEXT: mov r0, r2 502; CHECK-NEXT: mov r1, r3 503; CHECK-NEXT: bl exp 504; CHECK-NEXT: vmov d8, r0, r1 505; CHECK-NEXT: vmov q0, q4 506; CHECK-NEXT: vpop {d8, d9} 507; CHECK-NEXT: pop {r7, pc} 508entry: 509 %0 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %src) 510 ret <2 x double> %0 511} 512 513define arm_aapcs_vfpcc <4 x float> @exp2_float32_t(<4 x float> %src) { 514; CHECK-LABEL: exp2_float32_t: 515; CHECK: @ %bb.0: @ %entry 516; CHECK-NEXT: .save {r4, r5, r7, lr} 517; CHECK-NEXT: push {r4, r5, r7, lr} 518; CHECK-NEXT: .vsave {d8, d9} 519; CHECK-NEXT: vpush {d8, d9} 520; CHECK-NEXT: vmov q4, q0 521; CHECK-NEXT: vmov r0, r4, d9 522; CHECK-NEXT: bl exp2f 523; CHECK-NEXT: mov r5, r0 524; CHECK-NEXT: mov r0, r4 525; CHECK-NEXT: bl exp2f 526; CHECK-NEXT: vmov r4, r1, d8 527; CHECK-NEXT: vmov s19, r0 528; CHECK-NEXT: vmov s18, r5 529; CHECK-NEXT: mov r0, r1 530; CHECK-NEXT: bl exp2f 531; CHECK-NEXT: vmov s17, r0 532; CHECK-NEXT: mov r0, r4 533; CHECK-NEXT: bl exp2f 534; CHECK-NEXT: vmov s16, r0 535; CHECK-NEXT: vmov q0, q4 536; CHECK-NEXT: vpop {d8, d9} 537; CHECK-NEXT: pop {r4, r5, r7, pc} 538entry: 539 %0 = call fast <4 x float> @llvm.exp2.v4f32(<4 x float> %src) 540 ret <4 x float> %0 541} 542 543define arm_aapcs_vfpcc <8 x half> @exp2_float16_t(<8 x half> %src) { 544; CHECK-LABEL: exp2_float16_t: 545; CHECK: @ %bb.0: @ %entry 546; CHECK-NEXT: .save {r7, lr} 547; CHECK-NEXT: push {r7, lr} 548; CHECK-NEXT: .vsave {d8, d9, d10, d11} 549; CHECK-NEXT: vpush {d8, d9, d10, d11} 550; CHECK-NEXT: vmov q4, q0 551; CHECK-NEXT: vcvtb.f32.f16 s0, s16 552; CHECK-NEXT: vmov r0, s0 553; CHECK-NEXT: bl exp2f 554; CHECK-NEXT: vcvtt.f32.f16 s0, s16 555; CHECK-NEXT: vmov s16, r0 556; CHECK-NEXT: vmov r1, s0 557; CHECK-NEXT: mov r0, r1 558; CHECK-NEXT: bl exp2f 559; CHECK-NEXT: vmov s0, r0 560; CHECK-NEXT: vcvtb.f16.f32 s20, s16 561; CHECK-NEXT: vcvtt.f16.f32 s20, s0 562; CHECK-NEXT: vcvtb.f32.f16 s0, s17 563; CHECK-NEXT: vmov r0, s0 564; CHECK-NEXT: bl exp2f 565; CHECK-NEXT: vmov s0, r0 566; CHECK-NEXT: vcvtb.f16.f32 s21, s0 567; CHECK-NEXT: vcvtt.f32.f16 s0, s17 568; CHECK-NEXT: vmov r0, s0 569; CHECK-NEXT: bl exp2f 570; CHECK-NEXT: vmov s0, r0 571; CHECK-NEXT: vcvtt.f16.f32 s21, s0 572; CHECK-NEXT: vcvtb.f32.f16 s0, s18 573; CHECK-NEXT: vmov r0, s0 574; CHECK-NEXT: bl exp2f 575; CHECK-NEXT: vmov s0, r0 576; CHECK-NEXT: vcvtb.f16.f32 s22, s0 577; CHECK-NEXT: vcvtt.f32.f16 s0, s18 578; CHECK-NEXT: vmov r0, s0 579; CHECK-NEXT: bl exp2f 580; CHECK-NEXT: vmov s0, r0 581; CHECK-NEXT: vcvtt.f16.f32 s22, s0 582; CHECK-NEXT: vcvtb.f32.f16 s0, s19 583; CHECK-NEXT: vmov r0, s0 584; CHECK-NEXT: bl exp2f 585; CHECK-NEXT: vmov s0, r0 586; CHECK-NEXT: vcvtb.f16.f32 s23, s0 587; CHECK-NEXT: vcvtt.f32.f16 s0, s19 588; CHECK-NEXT: vmov r0, s0 589; CHECK-NEXT: bl exp2f 590; CHECK-NEXT: vmov s0, r0 591; CHECK-NEXT: vcvtt.f16.f32 s23, s0 592; CHECK-NEXT: vmov q0, q5 593; CHECK-NEXT: vpop {d8, d9, d10, d11} 594; CHECK-NEXT: pop {r7, pc} 595entry: 596 %0 = call fast <8 x half> @llvm.exp2.v8f16(<8 x half> %src) 597 ret <8 x half> %0 598} 599 600define arm_aapcs_vfpcc <2 x double> @exp2_float64_t(<2 x double> %src) { 601; CHECK-LABEL: exp2_float64_t: 602; CHECK: @ %bb.0: @ %entry 603; CHECK-NEXT: .save {r7, lr} 604; CHECK-NEXT: push {r7, lr} 605; CHECK-NEXT: .vsave {d8, d9} 606; CHECK-NEXT: vpush {d8, d9} 607; CHECK-NEXT: vmov q4, q0 608; CHECK-NEXT: vmov r0, r1, d9 609; CHECK-NEXT: bl exp2 610; CHECK-NEXT: vmov r2, r3, d8 611; CHECK-NEXT: vmov d9, r0, r1 612; CHECK-NEXT: mov r0, r2 613; CHECK-NEXT: mov r1, r3 614; CHECK-NEXT: bl exp2 615; CHECK-NEXT: vmov d8, r0, r1 616; CHECK-NEXT: vmov q0, q4 617; CHECK-NEXT: vpop {d8, d9} 618; CHECK-NEXT: pop {r7, pc} 619entry: 620 %0 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %src) 621 ret <2 x double> %0 622} 623 624define arm_aapcs_vfpcc <4 x float> @log_float32_t(<4 x float> %src) { 625; CHECK-LABEL: log_float32_t: 626; CHECK: @ %bb.0: @ %entry 627; CHECK-NEXT: .save {r4, r5, r7, lr} 628; CHECK-NEXT: push {r4, r5, r7, lr} 629; CHECK-NEXT: .vsave {d8, d9} 630; CHECK-NEXT: vpush {d8, d9} 631; CHECK-NEXT: vmov q4, q0 632; CHECK-NEXT: vmov r0, r4, d9 633; CHECK-NEXT: bl logf 634; CHECK-NEXT: mov r5, r0 635; CHECK-NEXT: mov r0, r4 636; CHECK-NEXT: bl logf 637; CHECK-NEXT: vmov r4, r1, d8 638; CHECK-NEXT: vmov s19, r0 639; CHECK-NEXT: vmov s18, r5 640; CHECK-NEXT: mov r0, r1 641; CHECK-NEXT: bl logf 642; CHECK-NEXT: vmov s17, r0 643; CHECK-NEXT: mov r0, r4 644; CHECK-NEXT: bl logf 645; CHECK-NEXT: vmov s16, r0 646; CHECK-NEXT: vmov q0, q4 647; CHECK-NEXT: vpop {d8, d9} 648; CHECK-NEXT: pop {r4, r5, r7, pc} 649entry: 650 %0 = call fast <4 x float> @llvm.log.v4f32(<4 x float> %src) 651 ret <4 x float> %0 652} 653 654define arm_aapcs_vfpcc <8 x half> @log_float16_t(<8 x half> %src) { 655; CHECK-LABEL: log_float16_t: 656; CHECK: @ %bb.0: @ %entry 657; CHECK-NEXT: .save {r7, lr} 658; CHECK-NEXT: push {r7, lr} 659; CHECK-NEXT: .vsave {d8, d9, d10, d11} 660; CHECK-NEXT: vpush {d8, d9, d10, d11} 661; CHECK-NEXT: vmov q4, q0 662; CHECK-NEXT: vcvtb.f32.f16 s0, s16 663; CHECK-NEXT: vmov r0, s0 664; CHECK-NEXT: bl logf 665; CHECK-NEXT: vcvtt.f32.f16 s0, s16 666; CHECK-NEXT: vmov s16, r0 667; CHECK-NEXT: vmov r1, s0 668; CHECK-NEXT: mov r0, r1 669; CHECK-NEXT: bl logf 670; CHECK-NEXT: vmov s0, r0 671; CHECK-NEXT: vcvtb.f16.f32 s20, s16 672; CHECK-NEXT: vcvtt.f16.f32 s20, s0 673; CHECK-NEXT: vcvtb.f32.f16 s0, s17 674; CHECK-NEXT: vmov r0, s0 675; CHECK-NEXT: bl logf 676; CHECK-NEXT: vmov s0, r0 677; CHECK-NEXT: vcvtb.f16.f32 s21, s0 678; CHECK-NEXT: vcvtt.f32.f16 s0, s17 679; CHECK-NEXT: vmov r0, s0 680; CHECK-NEXT: bl logf 681; CHECK-NEXT: vmov s0, r0 682; CHECK-NEXT: vcvtt.f16.f32 s21, s0 683; CHECK-NEXT: vcvtb.f32.f16 s0, s18 684; CHECK-NEXT: vmov r0, s0 685; CHECK-NEXT: bl logf 686; CHECK-NEXT: vmov s0, r0 687; CHECK-NEXT: vcvtb.f16.f32 s22, s0 688; CHECK-NEXT: vcvtt.f32.f16 s0, s18 689; CHECK-NEXT: vmov r0, s0 690; CHECK-NEXT: bl logf 691; CHECK-NEXT: vmov s0, r0 692; CHECK-NEXT: vcvtt.f16.f32 s22, s0 693; CHECK-NEXT: vcvtb.f32.f16 s0, s19 694; CHECK-NEXT: vmov r0, s0 695; CHECK-NEXT: bl logf 696; CHECK-NEXT: vmov s0, r0 697; CHECK-NEXT: vcvtb.f16.f32 s23, s0 698; CHECK-NEXT: vcvtt.f32.f16 s0, s19 699; CHECK-NEXT: vmov r0, s0 700; CHECK-NEXT: bl logf 701; CHECK-NEXT: vmov s0, r0 702; CHECK-NEXT: vcvtt.f16.f32 s23, s0 703; CHECK-NEXT: vmov q0, q5 704; CHECK-NEXT: vpop {d8, d9, d10, d11} 705; CHECK-NEXT: pop {r7, pc} 706entry: 707 %0 = call fast <8 x half> @llvm.log.v8f16(<8 x half> %src) 708 ret <8 x half> %0 709} 710 711define arm_aapcs_vfpcc <2 x double> @log_float64_t(<2 x double> %src) { 712; CHECK-LABEL: log_float64_t: 713; CHECK: @ %bb.0: @ %entry 714; CHECK-NEXT: .save {r7, lr} 715; CHECK-NEXT: push {r7, lr} 716; CHECK-NEXT: .vsave {d8, d9} 717; CHECK-NEXT: vpush {d8, d9} 718; CHECK-NEXT: vmov q4, q0 719; CHECK-NEXT: vmov r0, r1, d9 720; CHECK-NEXT: bl log 721; CHECK-NEXT: vmov r2, r3, d8 722; CHECK-NEXT: vmov d9, r0, r1 723; CHECK-NEXT: mov r0, r2 724; CHECK-NEXT: mov r1, r3 725; CHECK-NEXT: bl log 726; CHECK-NEXT: vmov d8, r0, r1 727; CHECK-NEXT: vmov q0, q4 728; CHECK-NEXT: vpop {d8, d9} 729; CHECK-NEXT: pop {r7, pc} 730entry: 731 %0 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %src) 732 ret <2 x double> %0 733} 734 735define arm_aapcs_vfpcc <4 x float> @log2_float32_t(<4 x float> %src) { 736; CHECK-LABEL: log2_float32_t: 737; CHECK: @ %bb.0: @ %entry 738; CHECK-NEXT: .save {r4, r5, r7, lr} 739; CHECK-NEXT: push {r4, r5, r7, lr} 740; CHECK-NEXT: .vsave {d8, d9} 741; CHECK-NEXT: vpush {d8, d9} 742; CHECK-NEXT: vmov q4, q0 743; CHECK-NEXT: vmov r0, r4, d9 744; CHECK-NEXT: bl log2f 745; CHECK-NEXT: mov r5, r0 746; CHECK-NEXT: mov r0, r4 747; CHECK-NEXT: bl log2f 748; CHECK-NEXT: vmov r4, r1, d8 749; CHECK-NEXT: vmov s19, r0 750; CHECK-NEXT: vmov s18, r5 751; CHECK-NEXT: mov r0, r1 752; CHECK-NEXT: bl log2f 753; CHECK-NEXT: vmov s17, r0 754; CHECK-NEXT: mov r0, r4 755; CHECK-NEXT: bl log2f 756; CHECK-NEXT: vmov s16, r0 757; CHECK-NEXT: vmov q0, q4 758; CHECK-NEXT: vpop {d8, d9} 759; CHECK-NEXT: pop {r4, r5, r7, pc} 760entry: 761 %0 = call fast <4 x float> @llvm.log2.v4f32(<4 x float> %src) 762 ret <4 x float> %0 763} 764 765define arm_aapcs_vfpcc <8 x half> @log2_float16_t(<8 x half> %src) { 766; CHECK-LABEL: log2_float16_t: 767; CHECK: @ %bb.0: @ %entry 768; CHECK-NEXT: .save {r7, lr} 769; CHECK-NEXT: push {r7, lr} 770; CHECK-NEXT: .vsave {d8, d9, d10, d11} 771; CHECK-NEXT: vpush {d8, d9, d10, d11} 772; CHECK-NEXT: vmov q4, q0 773; CHECK-NEXT: vcvtb.f32.f16 s0, s16 774; CHECK-NEXT: vmov r0, s0 775; CHECK-NEXT: bl log2f 776; CHECK-NEXT: vcvtt.f32.f16 s0, s16 777; CHECK-NEXT: vmov s16, r0 778; CHECK-NEXT: vmov r1, s0 779; CHECK-NEXT: mov r0, r1 780; CHECK-NEXT: bl log2f 781; CHECK-NEXT: vmov s0, r0 782; CHECK-NEXT: vcvtb.f16.f32 s20, s16 783; CHECK-NEXT: vcvtt.f16.f32 s20, s0 784; CHECK-NEXT: vcvtb.f32.f16 s0, s17 785; CHECK-NEXT: vmov r0, s0 786; CHECK-NEXT: bl log2f 787; CHECK-NEXT: vmov s0, r0 788; CHECK-NEXT: vcvtb.f16.f32 s21, s0 789; CHECK-NEXT: vcvtt.f32.f16 s0, s17 790; CHECK-NEXT: vmov r0, s0 791; CHECK-NEXT: bl log2f 792; CHECK-NEXT: vmov s0, r0 793; CHECK-NEXT: vcvtt.f16.f32 s21, s0 794; CHECK-NEXT: vcvtb.f32.f16 s0, s18 795; CHECK-NEXT: vmov r0, s0 796; CHECK-NEXT: bl log2f 797; CHECK-NEXT: vmov s0, r0 798; CHECK-NEXT: vcvtb.f16.f32 s22, s0 799; CHECK-NEXT: vcvtt.f32.f16 s0, s18 800; CHECK-NEXT: vmov r0, s0 801; CHECK-NEXT: bl log2f 802; CHECK-NEXT: vmov s0, r0 803; CHECK-NEXT: vcvtt.f16.f32 s22, s0 804; CHECK-NEXT: vcvtb.f32.f16 s0, s19 805; CHECK-NEXT: vmov r0, s0 806; CHECK-NEXT: bl log2f 807; CHECK-NEXT: vmov s0, r0 808; CHECK-NEXT: vcvtb.f16.f32 s23, s0 809; CHECK-NEXT: vcvtt.f32.f16 s0, s19 810; CHECK-NEXT: vmov r0, s0 811; CHECK-NEXT: bl log2f 812; CHECK-NEXT: vmov s0, r0 813; CHECK-NEXT: vcvtt.f16.f32 s23, s0 814; CHECK-NEXT: vmov q0, q5 815; CHECK-NEXT: vpop {d8, d9, d10, d11} 816; CHECK-NEXT: pop {r7, pc} 817entry: 818 %0 = call fast <8 x half> @llvm.log2.v8f16(<8 x half> %src) 819 ret <8 x half> %0 820} 821 822define arm_aapcs_vfpcc <2 x double> @log2_float64_t(<2 x double> %src) { 823; CHECK-LABEL: log2_float64_t: 824; CHECK: @ %bb.0: @ %entry 825; CHECK-NEXT: .save {r7, lr} 826; CHECK-NEXT: push {r7, lr} 827; CHECK-NEXT: .vsave {d8, d9} 828; CHECK-NEXT: vpush {d8, d9} 829; CHECK-NEXT: vmov q4, q0 830; CHECK-NEXT: vmov r0, r1, d9 831; CHECK-NEXT: bl log2 832; CHECK-NEXT: vmov r2, r3, d8 833; CHECK-NEXT: vmov d9, r0, r1 834; CHECK-NEXT: mov r0, r2 835; CHECK-NEXT: mov r1, r3 836; CHECK-NEXT: bl log2 837; CHECK-NEXT: vmov d8, r0, r1 838; CHECK-NEXT: vmov q0, q4 839; CHECK-NEXT: vpop {d8, d9} 840; CHECK-NEXT: pop {r7, pc} 841entry: 842 %0 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %src) 843 ret <2 x double> %0 844} 845 846define arm_aapcs_vfpcc <4 x float> @log10_float32_t(<4 x float> %src) { 847; CHECK-LABEL: log10_float32_t: 848; CHECK: @ %bb.0: @ %entry 849; CHECK-NEXT: .save {r4, r5, r7, lr} 850; CHECK-NEXT: push {r4, r5, r7, lr} 851; CHECK-NEXT: .vsave {d8, d9} 852; CHECK-NEXT: vpush {d8, d9} 853; CHECK-NEXT: vmov q4, q0 854; CHECK-NEXT: vmov r0, r4, d9 855; CHECK-NEXT: bl log10f 856; CHECK-NEXT: mov r5, r0 857; CHECK-NEXT: mov r0, r4 858; CHECK-NEXT: bl log10f 859; CHECK-NEXT: vmov r4, r1, d8 860; CHECK-NEXT: vmov s19, r0 861; CHECK-NEXT: vmov s18, r5 862; CHECK-NEXT: mov r0, r1 863; CHECK-NEXT: bl log10f 864; CHECK-NEXT: vmov s17, r0 865; CHECK-NEXT: mov r0, r4 866; CHECK-NEXT: bl log10f 867; CHECK-NEXT: vmov s16, r0 868; CHECK-NEXT: vmov q0, q4 869; CHECK-NEXT: vpop {d8, d9} 870; CHECK-NEXT: pop {r4, r5, r7, pc} 871entry: 872 %0 = call fast <4 x float> @llvm.log10.v4f32(<4 x float> %src) 873 ret <4 x float> %0 874} 875 876define arm_aapcs_vfpcc <8 x half> @log10_float16_t(<8 x half> %src) { 877; CHECK-LABEL: log10_float16_t: 878; CHECK: @ %bb.0: @ %entry 879; CHECK-NEXT: .save {r7, lr} 880; CHECK-NEXT: push {r7, lr} 881; CHECK-NEXT: .vsave {d8, d9, d10, d11} 882; CHECK-NEXT: vpush {d8, d9, d10, d11} 883; CHECK-NEXT: vmov q4, q0 884; CHECK-NEXT: vcvtb.f32.f16 s0, s16 885; CHECK-NEXT: vmov r0, s0 886; CHECK-NEXT: bl log10f 887; CHECK-NEXT: vcvtt.f32.f16 s0, s16 888; CHECK-NEXT: vmov s16, r0 889; CHECK-NEXT: vmov r1, s0 890; CHECK-NEXT: mov r0, r1 891; CHECK-NEXT: bl log10f 892; CHECK-NEXT: vmov s0, r0 893; CHECK-NEXT: vcvtb.f16.f32 s20, s16 894; CHECK-NEXT: vcvtt.f16.f32 s20, s0 895; CHECK-NEXT: vcvtb.f32.f16 s0, s17 896; CHECK-NEXT: vmov r0, s0 897; CHECK-NEXT: bl log10f 898; CHECK-NEXT: vmov s0, r0 899; CHECK-NEXT: vcvtb.f16.f32 s21, s0 900; CHECK-NEXT: vcvtt.f32.f16 s0, s17 901; CHECK-NEXT: vmov r0, s0 902; CHECK-NEXT: bl log10f 903; CHECK-NEXT: vmov s0, r0 904; CHECK-NEXT: vcvtt.f16.f32 s21, s0 905; CHECK-NEXT: vcvtb.f32.f16 s0, s18 906; CHECK-NEXT: vmov r0, s0 907; CHECK-NEXT: bl log10f 908; CHECK-NEXT: vmov s0, r0 909; CHECK-NEXT: vcvtb.f16.f32 s22, s0 910; CHECK-NEXT: vcvtt.f32.f16 s0, s18 911; CHECK-NEXT: vmov r0, s0 912; CHECK-NEXT: bl log10f 913; CHECK-NEXT: vmov s0, r0 914; CHECK-NEXT: vcvtt.f16.f32 s22, s0 915; CHECK-NEXT: vcvtb.f32.f16 s0, s19 916; CHECK-NEXT: vmov r0, s0 917; CHECK-NEXT: bl log10f 918; CHECK-NEXT: vmov s0, r0 919; CHECK-NEXT: vcvtb.f16.f32 s23, s0 920; CHECK-NEXT: vcvtt.f32.f16 s0, s19 921; CHECK-NEXT: vmov r0, s0 922; CHECK-NEXT: bl log10f 923; CHECK-NEXT: vmov s0, r0 924; CHECK-NEXT: vcvtt.f16.f32 s23, s0 925; CHECK-NEXT: vmov q0, q5 926; CHECK-NEXT: vpop {d8, d9, d10, d11} 927; CHECK-NEXT: pop {r7, pc} 928entry: 929 %0 = call fast <8 x half> @llvm.log10.v8f16(<8 x half> %src) 930 ret <8 x half> %0 931} 932 933define arm_aapcs_vfpcc <2 x double> @log10_float64_t(<2 x double> %src) { 934; CHECK-LABEL: log10_float64_t: 935; CHECK: @ %bb.0: @ %entry 936; CHECK-NEXT: .save {r7, lr} 937; CHECK-NEXT: push {r7, lr} 938; CHECK-NEXT: .vsave {d8, d9} 939; CHECK-NEXT: vpush {d8, d9} 940; CHECK-NEXT: vmov q4, q0 941; CHECK-NEXT: vmov r0, r1, d9 942; CHECK-NEXT: bl log10 943; CHECK-NEXT: vmov r2, r3, d8 944; CHECK-NEXT: vmov d9, r0, r1 945; CHECK-NEXT: mov r0, r2 946; CHECK-NEXT: mov r1, r3 947; CHECK-NEXT: bl log10 948; CHECK-NEXT: vmov d8, r0, r1 949; CHECK-NEXT: vmov q0, q4 950; CHECK-NEXT: vpop {d8, d9} 951; CHECK-NEXT: pop {r7, pc} 952entry: 953 %0 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %src) 954 ret <2 x double> %0 955} 956 957define arm_aapcs_vfpcc <4 x float> @pow_float32_t(<4 x float> %src1, <4 x float> %src2) { 958; CHECK-LABEL: pow_float32_t: 959; CHECK: @ %bb.0: @ %entry 960; CHECK-NEXT: .save {r4, r5, r6, lr} 961; CHECK-NEXT: push {r4, r5, r6, lr} 962; CHECK-NEXT: .vsave {d8, d9, d10, d11} 963; CHECK-NEXT: vpush {d8, d9, d10, d11} 964; CHECK-NEXT: vmov q4, q1 965; CHECK-NEXT: vmov q5, q0 966; CHECK-NEXT: vmov r0, r4, d11 967; CHECK-NEXT: vmov r1, r5, d9 968; CHECK-NEXT: bl powf 969; CHECK-NEXT: mov r6, r0 970; CHECK-NEXT: mov r0, r4 971; CHECK-NEXT: mov r1, r5 972; CHECK-NEXT: bl powf 973; CHECK-NEXT: vmov r4, r2, d10 974; CHECK-NEXT: vmov r5, r1, d8 975; CHECK-NEXT: vmov s19, r0 976; CHECK-NEXT: vmov s18, r6 977; CHECK-NEXT: mov r0, r2 978; CHECK-NEXT: bl powf 979; CHECK-NEXT: vmov s17, r0 980; CHECK-NEXT: mov r0, r4 981; CHECK-NEXT: mov r1, r5 982; CHECK-NEXT: bl powf 983; CHECK-NEXT: vmov s16, r0 984; CHECK-NEXT: vmov q0, q4 985; CHECK-NEXT: vpop {d8, d9, d10, d11} 986; CHECK-NEXT: pop {r4, r5, r6, pc} 987entry: 988 %0 = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %src1, <4 x float> %src2) 989 ret <4 x float> %0 990} 991 992define arm_aapcs_vfpcc <8 x half> @pow_float16_t(<8 x half> %src1, <8 x half> %src2) { 993; CHECK-LABEL: pow_float16_t: 994; CHECK: @ %bb.0: @ %entry 995; CHECK-NEXT: .save {r7, lr} 996; CHECK-NEXT: push {r7, lr} 997; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} 998; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} 999; CHECK-NEXT: vmov q5, q0 1000; CHECK-NEXT: vmov q4, q1 1001; CHECK-NEXT: vcvtb.f32.f16 s0, s20 1002; CHECK-NEXT: vmov r0, s0 1003; CHECK-NEXT: vcvtb.f32.f16 s0, s16 1004; CHECK-NEXT: vmov r1, s0 1005; CHECK-NEXT: bl powf 1006; CHECK-NEXT: vcvtt.f32.f16 s0, s20 1007; CHECK-NEXT: vmov r2, s0 1008; CHECK-NEXT: vcvtt.f32.f16 s0, s16 1009; CHECK-NEXT: vmov r1, s0 1010; CHECK-NEXT: vmov s16, r0 1011; CHECK-NEXT: mov r0, r2 1012; CHECK-NEXT: bl powf 1013; CHECK-NEXT: vmov s0, r0 1014; CHECK-NEXT: vcvtb.f16.f32 s24, s16 1015; CHECK-NEXT: vcvtt.f16.f32 s24, s0 1016; CHECK-NEXT: vcvtb.f32.f16 s0, s21 1017; CHECK-NEXT: vmov r0, s0 1018; CHECK-NEXT: vcvtb.f32.f16 s0, s17 1019; CHECK-NEXT: vmov r1, s0 1020; CHECK-NEXT: bl powf 1021; CHECK-NEXT: vmov s0, r0 1022; CHECK-NEXT: vcvtb.f16.f32 s25, s0 1023; CHECK-NEXT: vcvtt.f32.f16 s0, s21 1024; CHECK-NEXT: vmov r0, s0 1025; CHECK-NEXT: vcvtt.f32.f16 s0, s17 1026; CHECK-NEXT: vmov r1, s0 1027; CHECK-NEXT: bl powf 1028; CHECK-NEXT: vmov s0, r0 1029; CHECK-NEXT: vcvtt.f16.f32 s25, s0 1030; CHECK-NEXT: vcvtb.f32.f16 s0, s22 1031; CHECK-NEXT: vmov r0, s0 1032; CHECK-NEXT: vcvtb.f32.f16 s0, s18 1033; CHECK-NEXT: vmov r1, s0 1034; CHECK-NEXT: bl powf 1035; CHECK-NEXT: vmov s0, r0 1036; CHECK-NEXT: vcvtb.f16.f32 s26, s0 1037; CHECK-NEXT: vcvtt.f32.f16 s0, s22 1038; CHECK-NEXT: vmov r0, s0 1039; CHECK-NEXT: vcvtt.f32.f16 s0, s18 1040; CHECK-NEXT: vmov r1, s0 1041; CHECK-NEXT: bl powf 1042; CHECK-NEXT: vmov s0, r0 1043; CHECK-NEXT: vcvtt.f16.f32 s26, s0 1044; CHECK-NEXT: vcvtb.f32.f16 s0, s23 1045; CHECK-NEXT: vmov r0, s0 1046; CHECK-NEXT: vcvtb.f32.f16 s0, s19 1047; CHECK-NEXT: vmov r1, s0 1048; CHECK-NEXT: bl powf 1049; CHECK-NEXT: vmov s0, r0 1050; CHECK-NEXT: vcvtb.f16.f32 s27, s0 1051; CHECK-NEXT: vcvtt.f32.f16 s0, s23 1052; CHECK-NEXT: vmov r0, s0 1053; CHECK-NEXT: vcvtt.f32.f16 s0, s19 1054; CHECK-NEXT: vmov r1, s0 1055; CHECK-NEXT: bl powf 1056; CHECK-NEXT: vmov s0, r0 1057; CHECK-NEXT: vcvtt.f16.f32 s27, s0 1058; CHECK-NEXT: vmov q0, q6 1059; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} 1060; CHECK-NEXT: pop {r7, pc} 1061entry: 1062 %0 = call fast <8 x half> @llvm.pow.v8f16(<8 x half> %src1, <8 x half> %src2) 1063 ret <8 x half> %0 1064} 1065 1066define arm_aapcs_vfpcc <2 x double> @pow_float64_t(<2 x double> %src1, <2 x double> %src2) { 1067; CHECK-LABEL: pow_float64_t: 1068; CHECK: @ %bb.0: @ %entry 1069; CHECK-NEXT: .save {r7, lr} 1070; CHECK-NEXT: push {r7, lr} 1071; CHECK-NEXT: .vsave {d8, d9, d10, d11} 1072; CHECK-NEXT: vpush {d8, d9, d10, d11} 1073; CHECK-NEXT: vmov q4, q1 1074; CHECK-NEXT: vmov q5, q0 1075; CHECK-NEXT: vmov r0, r1, d11 1076; CHECK-NEXT: vmov r2, r3, d9 1077; CHECK-NEXT: bl pow 1078; CHECK-NEXT: vmov lr, r12, d10 1079; CHECK-NEXT: vmov r2, r3, d8 1080; CHECK-NEXT: vmov d9, r0, r1 1081; CHECK-NEXT: mov r0, lr 1082; CHECK-NEXT: mov r1, r12 1083; CHECK-NEXT: bl pow 1084; CHECK-NEXT: vmov d8, r0, r1 1085; CHECK-NEXT: vmov q0, q4 1086; CHECK-NEXT: vpop {d8, d9, d10, d11} 1087; CHECK-NEXT: pop {r7, pc} 1088entry: 1089 %0 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %src1, <2 x double> %src2) 1090 ret <2 x double> %0 1091} 1092 1093define arm_aapcs_vfpcc <4 x float> @copysign_float32_t(<4 x float> %src1, <4 x float> %src2) { 1094; FULLFP16-LABEL: copysign_float32_t: 1095; FULLFP16: @ %bb.0: @ %entry 1096; FULLFP16-NEXT: .save {r4, r5, r7, lr} 1097; FULLFP16-NEXT: push {r4, r5, r7, lr} 1098; FULLFP16-NEXT: vmov r12, r1, d2 1099; FULLFP16-NEXT: vmov r2, lr, d3 1100; FULLFP16-NEXT: vmov r3, r0, d0 1101; FULLFP16-NEXT: vmov r4, r5, d1 1102; FULLFP16-NEXT: lsrs r1, r1, #31 1103; FULLFP16-NEXT: bfi r0, r1, #31, #1 1104; FULLFP16-NEXT: lsrs r1, r2, #31 1105; FULLFP16-NEXT: bfi r4, r1, #31, #1 1106; FULLFP16-NEXT: lsr.w r1, lr, #31 1107; FULLFP16-NEXT: bfi r5, r1, #31, #1 1108; FULLFP16-NEXT: lsr.w r1, r12, #31 1109; FULLFP16-NEXT: bfi r3, r1, #31, #1 1110; FULLFP16-NEXT: vmov s2, r4 1111; FULLFP16-NEXT: vmov s3, r5 1112; FULLFP16-NEXT: vmov s1, r0 1113; FULLFP16-NEXT: vmov s0, r3 1114; FULLFP16-NEXT: pop {r4, r5, r7, pc} 1115; 1116; MVEFP-LABEL: copysign_float32_t: 1117; MVEFP: @ %bb.0: @ %entry 1118; MVEFP-NEXT: vmov.i32 q2, #0x80000000 1119; MVEFP-NEXT: vbic.i32 q0, #0x80000000 1120; MVEFP-NEXT: vand q1, q1, q2 1121; MVEFP-NEXT: vorr q0, q0, q1 1122; MVEFP-NEXT: bx lr 1123entry: 1124 %0 = call fast <4 x float> @llvm.copysign.v4f32(<4 x float> %src1, <4 x float> %src2) 1125 ret <4 x float> %0 1126} 1127 1128define arm_aapcs_vfpcc <8 x half> @copysign_float16_t(<8 x half> %src1, <8 x half> %src2) { 1129; FULLFP16-LABEL: copysign_float16_t: 1130; FULLFP16: @ %bb.0: @ %entry 1131; FULLFP16-NEXT: .pad #32 1132; FULLFP16-NEXT: sub sp, #32 1133; FULLFP16-NEXT: vmovx.f16 s8, s4 1134; FULLFP16-NEXT: vstr.16 s8, [sp, #24] 1135; FULLFP16-NEXT: vstr.16 s4, [sp, #28] 1136; FULLFP16-NEXT: vmovx.f16 s4, s5 1137; FULLFP16-NEXT: vstr.16 s4, [sp, #16] 1138; FULLFP16-NEXT: vmovx.f16 s4, s6 1139; FULLFP16-NEXT: vstr.16 s5, [sp, #20] 1140; FULLFP16-NEXT: vstr.16 s4, [sp, #8] 1141; FULLFP16-NEXT: vmovx.f16 s4, s7 1142; FULLFP16-NEXT: vstr.16 s6, [sp, #12] 1143; FULLFP16-NEXT: vstr.16 s4, [sp] 1144; FULLFP16-NEXT: vstr.16 s7, [sp, #4] 1145; FULLFP16-NEXT: ldrb.w r0, [sp, #25] 1146; FULLFP16-NEXT: vmovx.f16 s4, s0 1147; FULLFP16-NEXT: vabs.f16 s4, s4 1148; FULLFP16-NEXT: vneg.f16 s6, s4 1149; FULLFP16-NEXT: lsls r0, r0, #24 1150; FULLFP16-NEXT: it pl 1151; FULLFP16-NEXT: vmovpl.f32 s6, s4 1152; FULLFP16-NEXT: ldrb.w r0, [sp, #29] 1153; FULLFP16-NEXT: vabs.f16 s4, s0 1154; FULLFP16-NEXT: vneg.f16 s0, s4 1155; FULLFP16-NEXT: lsls r0, r0, #24 1156; FULLFP16-NEXT: it pl 1157; FULLFP16-NEXT: vmovpl.f32 s0, s4 1158; FULLFP16-NEXT: ldrb.w r0, [sp, #17] 1159; FULLFP16-NEXT: vmovx.f16 s4, s1 1160; FULLFP16-NEXT: vabs.f16 s4, s4 1161; FULLFP16-NEXT: vins.f16 s0, s6 1162; FULLFP16-NEXT: vneg.f16 s6, s4 1163; FULLFP16-NEXT: lsls r0, r0, #24 1164; FULLFP16-NEXT: it pl 1165; FULLFP16-NEXT: vmovpl.f32 s6, s4 1166; FULLFP16-NEXT: ldrb.w r0, [sp, #21] 1167; FULLFP16-NEXT: vabs.f16 s4, s1 1168; FULLFP16-NEXT: vneg.f16 s1, s4 1169; FULLFP16-NEXT: lsls r0, r0, #24 1170; FULLFP16-NEXT: it pl 1171; FULLFP16-NEXT: vmovpl.f32 s1, s4 1172; FULLFP16-NEXT: ldrb.w r0, [sp, #9] 1173; FULLFP16-NEXT: vmovx.f16 s4, s2 1174; FULLFP16-NEXT: vabs.f16 s4, s4 1175; FULLFP16-NEXT: vins.f16 s1, s6 1176; FULLFP16-NEXT: vneg.f16 s6, s4 1177; FULLFP16-NEXT: lsls r0, r0, #24 1178; FULLFP16-NEXT: it pl 1179; FULLFP16-NEXT: vmovpl.f32 s6, s4 1180; FULLFP16-NEXT: ldrb.w r0, [sp, #13] 1181; FULLFP16-NEXT: vabs.f16 s4, s2 1182; FULLFP16-NEXT: vneg.f16 s2, s4 1183; FULLFP16-NEXT: lsls r0, r0, #24 1184; FULLFP16-NEXT: it pl 1185; FULLFP16-NEXT: vmovpl.f32 s2, s4 1186; FULLFP16-NEXT: ldrb.w r0, [sp, #1] 1187; FULLFP16-NEXT: vmovx.f16 s4, s3 1188; FULLFP16-NEXT: vabs.f16 s4, s4 1189; FULLFP16-NEXT: vins.f16 s2, s6 1190; FULLFP16-NEXT: vneg.f16 s6, s4 1191; FULLFP16-NEXT: lsls r0, r0, #24 1192; FULLFP16-NEXT: it pl 1193; FULLFP16-NEXT: vmovpl.f32 s6, s4 1194; FULLFP16-NEXT: ldrb.w r0, [sp, #5] 1195; FULLFP16-NEXT: vabs.f16 s4, s3 1196; FULLFP16-NEXT: vneg.f16 s3, s4 1197; FULLFP16-NEXT: lsls r0, r0, #24 1198; FULLFP16-NEXT: it pl 1199; FULLFP16-NEXT: vmovpl.f32 s3, s4 1200; FULLFP16-NEXT: vins.f16 s3, s6 1201; FULLFP16-NEXT: add sp, #32 1202; FULLFP16-NEXT: bx lr 1203; 1204; MVEFP-LABEL: copysign_float16_t: 1205; MVEFP: @ %bb.0: @ %entry 1206; MVEFP-NEXT: vmov.i16 q2, #0x8000 1207; MVEFP-NEXT: vbic.i16 q0, #0x8000 1208; MVEFP-NEXT: vand q1, q1, q2 1209; MVEFP-NEXT: vorr q0, q0, q1 1210; MVEFP-NEXT: bx lr 1211entry: 1212 %0 = call fast <8 x half> @llvm.copysign.v8f16(<8 x half> %src1, <8 x half> %src2) 1213 ret <8 x half> %0 1214} 1215 1216define arm_aapcs_vfpcc <2 x double> @copysign_float64_t(<2 x double> %src1, <2 x double> %src2) { 1217; CHECK-LABEL: copysign_float64_t: 1218; CHECK: @ %bb.0: @ %entry 1219; CHECK-NEXT: .save {r7, lr} 1220; CHECK-NEXT: push {r7, lr} 1221; CHECK-NEXT: vmov r0, r1, d3 1222; CHECK-NEXT: vmov r0, lr, d2 1223; CHECK-NEXT: vmov r0, r3, d1 1224; CHECK-NEXT: vmov r12, r2, d0 1225; CHECK-NEXT: lsrs r1, r1, #31 1226; CHECK-NEXT: bfi r3, r1, #31, #1 1227; CHECK-NEXT: lsr.w r1, lr, #31 1228; CHECK-NEXT: bfi r2, r1, #31, #1 1229; CHECK-NEXT: vmov d1, r0, r3 1230; CHECK-NEXT: vmov d0, r12, r2 1231; CHECK-NEXT: pop {r7, pc} 1232entry: 1233 %0 = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> %src1, <2 x double> %src2) 1234 ret <2 x double> %0 1235} 1236 1237declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) 1238declare <4 x float> @llvm.cos.v4f32(<4 x float>) 1239declare <4 x float> @llvm.sin.v4f32(<4 x float>) 1240declare <4 x float> @llvm.exp.v4f32(<4 x float>) 1241declare <4 x float> @llvm.exp2.v4f32(<4 x float>) 1242declare <4 x float> @llvm.log.v4f32(<4 x float>) 1243declare <4 x float> @llvm.log2.v4f32(<4 x float>) 1244declare <4 x float> @llvm.log10.v4f32(<4 x float>) 1245declare <4 x float> @llvm.pow.v4f32(<4 x float>, <4 x float>) 1246declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) 1247declare <8 x half> @llvm.sqrt.v8f16(<8 x half>) 1248declare <8 x half> @llvm.cos.v8f16(<8 x half>) 1249declare <8 x half> @llvm.sin.v8f16(<8 x half>) 1250declare <8 x half> @llvm.exp.v8f16(<8 x half>) 1251declare <8 x half> @llvm.exp2.v8f16(<8 x half>) 1252declare <8 x half> @llvm.log.v8f16(<8 x half>) 1253declare <8 x half> @llvm.log2.v8f16(<8 x half>) 1254declare <8 x half> @llvm.log10.v8f16(<8 x half>) 1255declare <8 x half> @llvm.pow.v8f16(<8 x half>, <8 x half>) 1256declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>) 1257declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) 1258declare <2 x double> @llvm.cos.v2f64(<2 x double>) 1259declare <2 x double> @llvm.sin.v2f64(<2 x double>) 1260declare <2 x double> @llvm.exp.v2f64(<2 x double>) 1261declare <2 x double> @llvm.exp2.v2f64(<2 x double>) 1262declare <2 x double> @llvm.log.v2f64(<2 x double>) 1263declare <2 x double> @llvm.log2.v2f64(<2 x double>) 1264declare <2 x double> @llvm.log10.v2f64(<2 x double>) 1265declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) 1266declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) 1267