1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -mcpu=x86-64 | FileCheck %s --check-prefix=SSE2-SCHEDULE 4; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 | FileCheck %s --check-prefix=SSE3 5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 -mcpu=nocona | FileCheck %s --check-prefix=SSE3 6; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=AVX 7; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -mcpu=sandybridge | FileCheck %s --check-prefix=AVX 8 9define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 { 10; SSE2-LABEL: _Z1fe: 11; SSE2: ## %bb.0: ## %entry 12; SSE2-NEXT: pushq %rbp 13; SSE2-NEXT: .cfi_def_cfa_offset 16 14; SSE2-NEXT: .cfi_offset %rbp, -16 15; SSE2-NEXT: movq %rsp, %rbp 16; SSE2-NEXT: .cfi_def_cfa_register %rbp 17; SSE2-NEXT: fldt 16(%rbp) 18; SSE2-NEXT: fnstcw -4(%rbp) 19; SSE2-NEXT: movzwl -4(%rbp), %eax 20; SSE2-NEXT: orl $3072, %eax ## imm = 0xC00 21; SSE2-NEXT: movw %ax, -8(%rbp) 22; SSE2-NEXT: fldcw -8(%rbp) 23; SSE2-NEXT: fistl -12(%rbp) 24; SSE2-NEXT: fldcw -4(%rbp) 25; SSE2-NEXT: cvtsi2sdl -12(%rbp), %xmm0 26; SSE2-NEXT: movsd %xmm0, -64(%rbp) 27; SSE2-NEXT: movsd %xmm0, -32(%rbp) 28; SSE2-NEXT: fsubl -32(%rbp) 29; SSE2-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) 30; SSE2-NEXT: fmul %st, %st(1) 31; SSE2-NEXT: fnstcw -2(%rbp) 32; SSE2-NEXT: movzwl -2(%rbp), %eax 33; SSE2-NEXT: orl $3072, %eax ## imm = 0xC00 34; SSE2-NEXT: movw %ax, -6(%rbp) 35; SSE2-NEXT: fldcw -6(%rbp) 36; SSE2-NEXT: fxch %st(1) 37; SSE2-NEXT: fistl -16(%rbp) 38; SSE2-NEXT: fldcw -2(%rbp) 39; SSE2-NEXT: xorps %xmm0, %xmm0 40; SSE2-NEXT: cvtsi2sdl -16(%rbp), %xmm0 41; SSE2-NEXT: movsd %xmm0, -56(%rbp) 42; SSE2-NEXT: movsd %xmm0, -24(%rbp) 43; SSE2-NEXT: fsubl -24(%rbp) 44; SSE2-NEXT: fmulp %st, %st(1) 45; SSE2-NEXT: fstpl -48(%rbp) 46; SSE2-NEXT: popq %rbp 47; SSE2-NEXT: retq 48; 49; SSE2-SCHEDULE-LABEL: _Z1fe: 50; SSE2-SCHEDULE: ## %bb.0: ## %entry 51; SSE2-SCHEDULE-NEXT: pushq %rbp 52; SSE2-SCHEDULE-NEXT: .cfi_def_cfa_offset 16 53; SSE2-SCHEDULE-NEXT: .cfi_offset %rbp, -16 54; SSE2-SCHEDULE-NEXT: movq %rsp, %rbp 55; SSE2-SCHEDULE-NEXT: .cfi_def_cfa_register %rbp 56; SSE2-SCHEDULE-NEXT: fldt 16(%rbp) 57; SSE2-SCHEDULE-NEXT: fnstcw -4(%rbp) 58; SSE2-SCHEDULE-NEXT: movzwl -4(%rbp), %eax 59; SSE2-SCHEDULE-NEXT: orl $3072, %eax ## imm = 0xC00 60; SSE2-SCHEDULE-NEXT: movw %ax, -8(%rbp) 61; SSE2-SCHEDULE-NEXT: fldcw -8(%rbp) 62; SSE2-SCHEDULE-NEXT: fistl -12(%rbp) 63; SSE2-SCHEDULE-NEXT: fldcw -4(%rbp) 64; SSE2-SCHEDULE-NEXT: cvtsi2sdl -12(%rbp), %xmm0 65; SSE2-SCHEDULE-NEXT: movsd %xmm0, -64(%rbp) 66; SSE2-SCHEDULE-NEXT: movsd %xmm0, -32(%rbp) 67; SSE2-SCHEDULE-NEXT: fsubl -32(%rbp) 68; SSE2-SCHEDULE-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) 69; SSE2-SCHEDULE-NEXT: fmul %st, %st(1) 70; SSE2-SCHEDULE-NEXT: fnstcw -2(%rbp) 71; SSE2-SCHEDULE-NEXT: movzwl -2(%rbp), %eax 72; SSE2-SCHEDULE-NEXT: orl $3072, %eax ## imm = 0xC00 73; SSE2-SCHEDULE-NEXT: movw %ax, -6(%rbp) 74; SSE2-SCHEDULE-NEXT: fldcw -6(%rbp) 75; SSE2-SCHEDULE-NEXT: fxch %st(1) 76; SSE2-SCHEDULE-NEXT: fistl -16(%rbp) 77; SSE2-SCHEDULE-NEXT: fldcw -2(%rbp) 78; SSE2-SCHEDULE-NEXT: xorps %xmm0, %xmm0 79; SSE2-SCHEDULE-NEXT: cvtsi2sdl -16(%rbp), %xmm0 80; SSE2-SCHEDULE-NEXT: movsd %xmm0, -56(%rbp) 81; SSE2-SCHEDULE-NEXT: movsd %xmm0, -24(%rbp) 82; SSE2-SCHEDULE-NEXT: fsubl -24(%rbp) 83; SSE2-SCHEDULE-NEXT: fmulp %st, %st(1) 84; SSE2-SCHEDULE-NEXT: fstpl -48(%rbp) 85; SSE2-SCHEDULE-NEXT: popq %rbp 86; SSE2-SCHEDULE-NEXT: retq 87; 88; SSE3-LABEL: _Z1fe: 89; SSE3: ## %bb.0: ## %entry 90; SSE3-NEXT: pushq %rbp 91; SSE3-NEXT: .cfi_def_cfa_offset 16 92; SSE3-NEXT: .cfi_offset %rbp, -16 93; SSE3-NEXT: movq %rsp, %rbp 94; SSE3-NEXT: .cfi_def_cfa_register %rbp 95; SSE3-NEXT: fldt 16(%rbp) 96; SSE3-NEXT: fld %st(0) 97; SSE3-NEXT: fisttpl -4(%rbp) 98; SSE3-NEXT: cvtsi2sdl -4(%rbp), %xmm0 99; SSE3-NEXT: movsd %xmm0, -48(%rbp) 100; SSE3-NEXT: movsd %xmm0, -24(%rbp) 101; SSE3-NEXT: fsubl -24(%rbp) 102; SSE3-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) 103; SSE3-NEXT: fmul %st, %st(1) 104; SSE3-NEXT: fld %st(1) 105; SSE3-NEXT: fisttpl -8(%rbp) 106; SSE3-NEXT: xorps %xmm0, %xmm0 107; SSE3-NEXT: cvtsi2sdl -8(%rbp), %xmm0 108; SSE3-NEXT: movsd %xmm0, -40(%rbp) 109; SSE3-NEXT: movsd %xmm0, -16(%rbp) 110; SSE3-NEXT: fxch %st(1) 111; SSE3-NEXT: fsubl -16(%rbp) 112; SSE3-NEXT: fmulp %st, %st(1) 113; SSE3-NEXT: fstpl -32(%rbp) 114; SSE3-NEXT: popq %rbp 115; SSE3-NEXT: retq 116; 117; AVX-LABEL: _Z1fe: 118; AVX: ## %bb.0: ## %entry 119; AVX-NEXT: pushq %rbp 120; AVX-NEXT: .cfi_def_cfa_offset 16 121; AVX-NEXT: .cfi_offset %rbp, -16 122; AVX-NEXT: movq %rsp, %rbp 123; AVX-NEXT: .cfi_def_cfa_register %rbp 124; AVX-NEXT: fldt 16(%rbp) 125; AVX-NEXT: fld %st(0) 126; AVX-NEXT: fisttpl -4(%rbp) 127; AVX-NEXT: vcvtsi2sdl -4(%rbp), %xmm0, %xmm0 128; AVX-NEXT: vmovsd %xmm0, -48(%rbp) 129; AVX-NEXT: vmovsd %xmm0, -24(%rbp) 130; AVX-NEXT: fsubl -24(%rbp) 131; AVX-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip) 132; AVX-NEXT: fmul %st, %st(1) 133; AVX-NEXT: fld %st(1) 134; AVX-NEXT: fisttpl -8(%rbp) 135; AVX-NEXT: vcvtsi2sdl -8(%rbp), %xmm1, %xmm0 136; AVX-NEXT: vmovsd %xmm0, -40(%rbp) 137; AVX-NEXT: vmovsd %xmm0, -16(%rbp) 138; AVX-NEXT: fxch %st(1) 139; AVX-NEXT: fsubl -16(%rbp) 140; AVX-NEXT: fmulp %st, %st(1) 141; AVX-NEXT: fstpl -32(%rbp) 142; AVX-NEXT: popq %rbp 143; AVX-NEXT: retq 144entry: 145 %tx = alloca [3 x double], align 16 146 %conv = fptosi x86_fp80 %z to i32 147 %conv1 = sitofp i32 %conv to double 148 store double %conv1, ptr %tx, align 16 149 %conv4 = fpext double %conv1 to x86_fp80 150 %sub = fsub x86_fp80 %z, %conv4 151 %mul = fmul x86_fp80 %sub, 0xK40178000000000000000 152 %conv.1 = fptosi x86_fp80 %mul to i32 153 %conv1.1 = sitofp i32 %conv.1 to double 154 %arrayidx.1 = getelementptr inbounds [3 x double], ptr %tx, i64 0, i64 1 155 store double %conv1.1, ptr %arrayidx.1, align 8 156 %conv4.1 = fpext double %conv1.1 to x86_fp80 157 %sub.1 = fsub x86_fp80 %mul, %conv4.1 158 %mul.1 = fmul x86_fp80 %sub.1, 0xK40178000000000000000 159 %conv5 = fptrunc x86_fp80 %mul.1 to double 160 %arrayidx6 = getelementptr inbounds [3 x double], ptr %tx, i64 0, i64 2 161 store double %conv5, ptr %arrayidx6, align 16 162 ret void 163} 164 165attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 166