1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-X87 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 5 6; Ideally this would compile to 5 multiplies. 7 8define double @pow_wrapper(double %a) nounwind readonly ssp noredzone { 9; X86-X87-LABEL: pow_wrapper: 10; X86-X87: # %bb.0: 11; X86-X87-NEXT: fldl {{[0-9]+}}(%esp) 12; X86-X87-NEXT: fld %st(0) 13; X86-X87-NEXT: fmul %st(1), %st 14; X86-X87-NEXT: fmul %st, %st(1) 15; X86-X87-NEXT: fmul %st, %st(0) 16; X86-X87-NEXT: fmul %st, %st(1) 17; X86-X87-NEXT: fmul %st, %st(0) 18; X86-X87-NEXT: fmulp %st, %st(1) 19; X86-X87-NEXT: retl 20; 21; X86-SSE-LABEL: pow_wrapper: 22; X86-SSE: # %bb.0: 23; X86-SSE-NEXT: pushl %ebp 24; X86-SSE-NEXT: movl %esp, %ebp 25; X86-SSE-NEXT: andl $-8, %esp 26; X86-SSE-NEXT: subl $8, %esp 27; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 28; X86-SSE-NEXT: movapd %xmm0, %xmm1 29; X86-SSE-NEXT: mulsd %xmm0, %xmm1 30; X86-SSE-NEXT: mulsd %xmm1, %xmm0 31; X86-SSE-NEXT: mulsd %xmm1, %xmm1 32; X86-SSE-NEXT: mulsd %xmm1, %xmm0 33; X86-SSE-NEXT: mulsd %xmm1, %xmm1 34; X86-SSE-NEXT: mulsd %xmm0, %xmm1 35; X86-SSE-NEXT: movsd %xmm1, (%esp) 36; X86-SSE-NEXT: fldl (%esp) 37; X86-SSE-NEXT: movl %ebp, %esp 38; X86-SSE-NEXT: popl %ebp 39; X86-SSE-NEXT: retl 40; 41; X64-LABEL: pow_wrapper: 42; X64: # %bb.0: 43; X64-NEXT: movapd %xmm0, %xmm1 44; X64-NEXT: mulsd %xmm0, %xmm1 45; X64-NEXT: mulsd %xmm1, %xmm0 46; X64-NEXT: mulsd %xmm1, %xmm1 47; X64-NEXT: mulsd %xmm1, %xmm0 48; X64-NEXT: mulsd %xmm1, %xmm1 49; X64-NEXT: mulsd %xmm1, %xmm0 50; X64-NEXT: retq 51 %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1] 52 ret double %ret 53} 54 55define double @pow_wrapper_optsize(double %a) optsize { 56; X86-X87-LABEL: pow_wrapper_optsize: 57; X86-X87: # %bb.0: 58; X86-X87-NEXT: subl $12, %esp 59; X86-X87-NEXT: .cfi_def_cfa_offset 16 60; X86-X87-NEXT: fldl {{[0-9]+}}(%esp) 61; X86-X87-NEXT: fstpl (%esp) 62; X86-X87-NEXT: movl $15, {{[0-9]+}}(%esp) 63; X86-X87-NEXT: calll __powidf2 64; X86-X87-NEXT: addl $12, %esp 65; X86-X87-NEXT: .cfi_def_cfa_offset 4 66; X86-X87-NEXT: retl 67; 68; X86-SSE-LABEL: pow_wrapper_optsize: 69; X86-SSE: # %bb.0: 70; X86-SSE-NEXT: subl $12, %esp 71; X86-SSE-NEXT: .cfi_def_cfa_offset 16 72; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 73; X86-SSE-NEXT: movsd %xmm0, (%esp) 74; X86-SSE-NEXT: movl $15, {{[0-9]+}}(%esp) 75; X86-SSE-NEXT: calll __powidf2 76; X86-SSE-NEXT: addl $12, %esp 77; X86-SSE-NEXT: .cfi_def_cfa_offset 4 78; X86-SSE-NEXT: retl 79; 80; X64-LABEL: pow_wrapper_optsize: 81; X64: # %bb.0: 82; X64-NEXT: movl $15, %edi 83; X64-NEXT: jmp __powidf2@PLT # TAILCALL 84 %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1] 85 ret double %ret 86} 87 88define double @pow_wrapper_pgso(double %a) !prof !14 { 89; X86-X87-LABEL: pow_wrapper_pgso: 90; X86-X87: # %bb.0: 91; X86-X87-NEXT: subl $12, %esp 92; X86-X87-NEXT: .cfi_def_cfa_offset 16 93; X86-X87-NEXT: fldl {{[0-9]+}}(%esp) 94; X86-X87-NEXT: fstpl (%esp) 95; X86-X87-NEXT: movl $15, {{[0-9]+}}(%esp) 96; X86-X87-NEXT: calll __powidf2 97; X86-X87-NEXT: addl $12, %esp 98; X86-X87-NEXT: .cfi_def_cfa_offset 4 99; X86-X87-NEXT: retl 100; 101; X86-SSE-LABEL: pow_wrapper_pgso: 102; X86-SSE: # %bb.0: 103; X86-SSE-NEXT: subl $12, %esp 104; X86-SSE-NEXT: .cfi_def_cfa_offset 16 105; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 106; X86-SSE-NEXT: movsd %xmm0, (%esp) 107; X86-SSE-NEXT: movl $15, {{[0-9]+}}(%esp) 108; X86-SSE-NEXT: calll __powidf2 109; X86-SSE-NEXT: addl $12, %esp 110; X86-SSE-NEXT: .cfi_def_cfa_offset 4 111; X86-SSE-NEXT: retl 112; 113; X64-LABEL: pow_wrapper_pgso: 114; X64: # %bb.0: 115; X64-NEXT: movl $15, %edi 116; X64-NEXT: jmp __powidf2@PLT # TAILCALL 117 %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1] 118 ret double %ret 119} 120 121define double @pow_wrapper_minsize(double %a) minsize { 122; X86-X87-LABEL: pow_wrapper_minsize: 123; X86-X87: # %bb.0: 124; X86-X87-NEXT: subl $12, %esp 125; X86-X87-NEXT: .cfi_def_cfa_offset 16 126; X86-X87-NEXT: fldl {{[0-9]+}}(%esp) 127; X86-X87-NEXT: fstpl (%esp) 128; X86-X87-NEXT: movl $15, {{[0-9]+}}(%esp) 129; X86-X87-NEXT: calll __powidf2 130; X86-X87-NEXT: addl $12, %esp 131; X86-X87-NEXT: .cfi_def_cfa_offset 4 132; X86-X87-NEXT: retl 133; 134; X86-SSE-LABEL: pow_wrapper_minsize: 135; X86-SSE: # %bb.0: 136; X86-SSE-NEXT: subl $12, %esp 137; X86-SSE-NEXT: .cfi_def_cfa_offset 16 138; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 139; X86-SSE-NEXT: movsd %xmm0, (%esp) 140; X86-SSE-NEXT: movl $15, {{[0-9]+}}(%esp) 141; X86-SSE-NEXT: calll __powidf2 142; X86-SSE-NEXT: addl $12, %esp 143; X86-SSE-NEXT: .cfi_def_cfa_offset 4 144; X86-SSE-NEXT: retl 145; 146; X64-LABEL: pow_wrapper_minsize: 147; X64: # %bb.0: 148; X64-NEXT: pushq $15 149; X64-NEXT: .cfi_adjust_cfa_offset 8 150; X64-NEXT: popq %rdi 151; X64-NEXT: .cfi_adjust_cfa_offset -8 152; X64-NEXT: jmp __powidf2@PLT # TAILCALL 153 %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1] 154 ret double %ret 155} 156 157define <2 x float> @powi_v2f32(<2 x float> %a) minsize { 158; X86-X87-LABEL: powi_v2f32: 159; X86-X87: # %bb.0: 160; X86-X87-NEXT: pushl %esi 161; X86-X87-NEXT: .cfi_def_cfa_offset 8 162; X86-X87-NEXT: subl $16, %esp 163; X86-X87-NEXT: .cfi_def_cfa_offset 24 164; X86-X87-NEXT: .cfi_offset %esi, -8 165; X86-X87-NEXT: flds {{[0-9]+}}(%esp) 166; X86-X87-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill 167; X86-X87-NEXT: flds {{[0-9]+}}(%esp) 168; X86-X87-NEXT: pushl $15 169; X86-X87-NEXT: .cfi_adjust_cfa_offset 4 170; X86-X87-NEXT: popl %esi 171; X86-X87-NEXT: .cfi_adjust_cfa_offset -4 172; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) 173; X86-X87-NEXT: fstps (%esp) 174; X86-X87-NEXT: calll __powisf2 175; X86-X87-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill 176; X86-X87-NEXT: movl %esi, {{[0-9]+}}(%esp) 177; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload 178; X86-X87-NEXT: fstps (%esp) 179; X86-X87-NEXT: calll __powisf2 180; X86-X87-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload 181; X86-X87-NEXT: fxch %st(1) 182; X86-X87-NEXT: addl $16, %esp 183; X86-X87-NEXT: .cfi_def_cfa_offset 8 184; X86-X87-NEXT: popl %esi 185; X86-X87-NEXT: .cfi_def_cfa_offset 4 186; X86-X87-NEXT: retl 187; 188; X86-SSE-LABEL: powi_v2f32: 189; X86-SSE: # %bb.0: 190; X86-SSE-NEXT: pushl %esi 191; X86-SSE-NEXT: .cfi_def_cfa_offset 8 192; X86-SSE-NEXT: subl $32, %esp 193; X86-SSE-NEXT: .cfi_def_cfa_offset 40 194; X86-SSE-NEXT: .cfi_offset %esi, -8 195; X86-SSE-NEXT: movups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 196; X86-SSE-NEXT: pushl $15 197; X86-SSE-NEXT: .cfi_adjust_cfa_offset 4 198; X86-SSE-NEXT: popl %esi 199; X86-SSE-NEXT: .cfi_adjust_cfa_offset -4 200; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) 201; X86-SSE-NEXT: movss %xmm0, (%esp) 202; X86-SSE-NEXT: calll __powisf2 203; X86-SSE-NEXT: movl %esi, {{[0-9]+}}(%esp) 204; X86-SSE-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 205; X86-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 206; X86-SSE-NEXT: movss %xmm0, (%esp) 207; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) 208; X86-SSE-NEXT: calll __powisf2 209; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) 210; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 211; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 212; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 213; X86-SSE-NEXT: addl $32, %esp 214; X86-SSE-NEXT: .cfi_def_cfa_offset 8 215; X86-SSE-NEXT: popl %esi 216; X86-SSE-NEXT: .cfi_def_cfa_offset 4 217; X86-SSE-NEXT: retl 218; 219; X64-LABEL: powi_v2f32: 220; X64: # %bb.0: 221; X64-NEXT: pushq %rbx 222; X64-NEXT: .cfi_def_cfa_offset 16 223; X64-NEXT: subq $32, %rsp 224; X64-NEXT: .cfi_def_cfa_offset 48 225; X64-NEXT: .cfi_offset %rbx, -16 226; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 227; X64-NEXT: pushq $15 228; X64-NEXT: .cfi_adjust_cfa_offset 8 229; X64-NEXT: popq %rbx 230; X64-NEXT: .cfi_adjust_cfa_offset -8 231; X64-NEXT: movl %ebx, %edi 232; X64-NEXT: callq __powisf2@PLT 233; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 234; X64-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 235; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 236; X64-NEXT: movl %ebx, %edi 237; X64-NEXT: callq __powisf2@PLT 238; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 239; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 240; X64-NEXT: movaps %xmm1, %xmm0 241; X64-NEXT: addq $32, %rsp 242; X64-NEXT: .cfi_def_cfa_offset 16 243; X64-NEXT: popq %rbx 244; X64-NEXT: .cfi_def_cfa_offset 8 245; X64-NEXT: retq 246 %ret = tail call < 2 x float> @llvm.powi.v2f32.i32(<2 x float> %a, i32 15) nounwind ; 247 ret <2 x float> %ret 248} 249 250declare double @llvm.powi.f64.i32(double, i32) nounwind readonly 251declare < 2 x float> @llvm.powi.v2f32.i32(<2 x float>, i32) nounwind readonly 252 253!llvm.module.flags = !{!0} 254!0 = !{i32 1, !"ProfileSummary", !1} 255!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} 256!2 = !{!"ProfileFormat", !"InstrProf"} 257!3 = !{!"TotalCount", i64 10000} 258!4 = !{!"MaxCount", i64 10} 259!5 = !{!"MaxInternalCount", i64 1} 260!6 = !{!"MaxFunctionCount", i64 1000} 261!7 = !{!"NumCounts", i64 3} 262!8 = !{!"NumFunctions", i64 3} 263!9 = !{!"DetailedSummary", !10} 264!10 = !{!11, !12, !13} 265!11 = !{i32 10000, i64 100, i32 1} 266!12 = !{i32 999000, i64 100, i32 1} 267!13 = !{i32 999999, i64 1, i32 2} 268!14 = !{!"function_entry_count", i64 0} 269