xref: /llvm-project/llvm/test/CodeGen/X86/fp128-i128.ll (revision 8fa1e5771bbd080c8a2a11c0579a3082cedbf94a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-android -mattr=+mmx -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,SSE
3; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-gnu     -mattr=+mmx -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,SSE
4; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-android -mattr=+mmx,avx2 -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX
5; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-gnu     -mattr=+mmx,avx2 -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX
6; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-android -mattr=+mmx,avx512vl -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX
7; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-gnu     -mattr=+mmx,avx512vl -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX
8
9; These tests were generated from simplified libm C code.
10; When compiled for the x86_64-linux-android target,
11; long double is mapped to f128 type that should be passed
12; in SSE registers. When the f128 type calling convention
13; problem was fixed, old llvm code failed to handle f128 values
14; in several f128/i128 type operations. These unit tests hopefully
15; will catch regression in any future change in this area.
16; To modified or enhance these test cases, please consult libm
17; code pattern and compile with -target x86_64-linux-android
18; to generate IL. The __float128 keyword if not accepted by
19; clang, just define it to "long double".
20;
21
22; typedef long double __float128;
23; union IEEEl2bits {
24;   __float128 e;
25;   struct {
26;     unsigned long manl :64;
27;     unsigned long manh :48;
28;     unsigned int exp :15;
29;     unsigned int sign :1;
30;   } bits;
31;   struct {
32;     unsigned long manl :64;
33;     unsigned long manh :48;
34;     unsigned int expsign :16;
35;   } xbits;
36; };
37
38; C code:
39; void foo(__float128 x);
40; void TestUnionLD1(__float128 s, unsigned long n) {
41;      union IEEEl2bits u;
42;      __float128 w;
43;      u.e = s;
44;      u.bits.manh = n;
45;      w = u.e;
46;      foo(w);
47; }
48define dso_local void @TestUnionLD1(fp128 %s, i64 %n) #0 {
49; SSE-LABEL: TestUnionLD1:
50; SSE:       # %bb.0: # %entry
51; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
52; SSE-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
53; SSE-NEXT:    shlq $48, %rax
54; SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
55; SSE-NEXT:    movabsq $281474976710655, %rdx # imm = 0xFFFFFFFFFFFF
56; SSE-NEXT:    andq %rdi, %rdx
57; SSE-NEXT:    orq %rax, %rdx
58; SSE-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp)
59; SSE-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
60; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
61; SSE-NEXT:    jmp foo # TAILCALL
62;
63; AVX-LABEL: TestUnionLD1:
64; AVX:       # %bb.0: # %entry
65; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
66; AVX-NEXT:    movzwl -{{[0-9]+}}(%rsp), %eax
67; AVX-NEXT:    shlq $48, %rax
68; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rcx
69; AVX-NEXT:    movabsq $281474976710655, %rdx # imm = 0xFFFFFFFFFFFF
70; AVX-NEXT:    andq %rdi, %rdx
71; AVX-NEXT:    orq %rax, %rdx
72; AVX-NEXT:    movq %rcx, -{{[0-9]+}}(%rsp)
73; AVX-NEXT:    movq %rdx, -{{[0-9]+}}(%rsp)
74; AVX-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
75; AVX-NEXT:    jmp foo # TAILCALL
76entry:
77  %0 = bitcast fp128 %s to i128
78  %1 = zext i64 %n to i128
79  %bf.value = shl nuw i128 %1, 64
80  %bf.shl = and i128 %bf.value, 5192296858534809181786422619668480
81  %bf.clear = and i128 %0, -5192296858534809181786422619668481
82  %bf.set = or i128 %bf.shl, %bf.clear
83  %2 = bitcast i128 %bf.set to fp128
84  tail call void @foo(fp128 %2) #2
85  ret void
86}
87
88; C code:
89; __float128 TestUnionLD2(__float128 s) {
90;      union IEEEl2bits u;
91;      __float128 w;
92;      u.e = s;
93;      u.bits.manl = 0;
94;      w = u.e;
95;      return w;
96; }
97define fp128 @TestUnionLD2(fp128 %s) #0 {
98; SSE-LABEL: TestUnionLD2:
99; SSE:       # %bb.0: # %entry
100; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
101; SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
102; SSE-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
103; SSE-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
104; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
105; SSE-NEXT:    retq
106;
107; AVX-LABEL: TestUnionLD2:
108; AVX:       # %bb.0: # %entry
109; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
110; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
111; AVX-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
112; AVX-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
113; AVX-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
114; AVX-NEXT:    retq
115entry:
116  %0 = bitcast fp128 %s to i128
117  %bf.clear = and i128 %0, -18446744073709551616
118  %1 = bitcast i128 %bf.clear to fp128
119  ret fp128 %1
120}
121
122; C code:
123; __float128 TestI128_1(__float128 x)
124; {
125;  union IEEEl2bits z;
126;  z.e = x;
127;  z.bits.sign = 0;
128;  return (z.e < 0.1L) ? 1.0L : 2.0L;
129; }
130define fp128 @TestI128_1(fp128 %x) #0 {
131; SSE-LABEL: TestI128_1:
132; SSE:       # %bb.0: # %entry
133; SSE-NEXT:    pushq %rax
134; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
135; SSE-NEXT:    movaps {{.*#+}} xmm1 = [1.00000000000000000000000000000000005E-1]
136; SSE-NEXT:    callq __lttf2@PLT
137; SSE-NEXT:    xorl %ecx, %ecx
138; SSE-NEXT:    testl %eax, %eax
139; SSE-NEXT:    sets %cl
140; SSE-NEXT:    shll $4, %ecx
141; SSE-NEXT:    movaps {{.*#+}} xmm0 = [?]
142; SSE-NEXT:    popq %rax
143; SSE-NEXT:    retq
144;
145; AVX-LABEL: TestI128_1:
146; AVX:       # %bb.0: # %entry
147; AVX-NEXT:    pushq %rax
148; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
149; AVX-NEXT:    vmovaps {{.*#+}} xmm1 = [1.00000000000000000000000000000000005E-1]
150; AVX-NEXT:    callq __lttf2@PLT
151; AVX-NEXT:    xorl %ecx, %ecx
152; AVX-NEXT:    testl %eax, %eax
153; AVX-NEXT:    sets %cl
154; AVX-NEXT:    shll $4, %ecx
155; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [?]
156; AVX-NEXT:    popq %rax
157; AVX-NEXT:    retq
158entry:
159  %0 = bitcast fp128 %x to i128
160  %bf.clear = and i128 %0, 170141183460469231731687303715884105727
161  %1 = bitcast i128 %bf.clear to fp128
162  %cmp = fcmp olt fp128 %1, 0xL999999999999999A3FFB999999999999
163  %cond = select i1 %cmp, fp128 0xL00000000000000003FFF000000000000, fp128 0xL00000000000000004000000000000000
164  ret fp128 %cond
165}
166
167; C code:
168; __float128 TestI128_2(__float128 x, __float128 y)
169; {
170;  unsigned short hx;
171;  union IEEEl2bits ge_u;
172;  ge_u.e = x;
173;  hx = ge_u.xbits.expsign;
174;  return (hx & 0x8000) == 0 ? x : y;
175; }
176define fp128 @TestI128_2(fp128 %x, fp128 %y) #0 {
177; SSE-LABEL: TestI128_2:
178; SSE:       # %bb.0: # %entry
179; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
180; SSE-NEXT:    cmpq $0, -{{[0-9]+}}(%rsp)
181; SSE-NEXT:    jns .LBB3_2
182; SSE-NEXT:  # %bb.1: # %entry
183; SSE-NEXT:    movaps %xmm1, %xmm0
184; SSE-NEXT:  .LBB3_2: # %entry
185; SSE-NEXT:    retq
186;
187; AVX-LABEL: TestI128_2:
188; AVX:       # %bb.0: # %entry
189; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
190; AVX-NEXT:    cmpq $0, -{{[0-9]+}}(%rsp)
191; AVX-NEXT:    jns .LBB3_2
192; AVX-NEXT:  # %bb.1: # %entry
193; AVX-NEXT:    vmovaps %xmm1, %xmm0
194; AVX-NEXT:  .LBB3_2: # %entry
195; AVX-NEXT:    retq
196entry:
197  %0 = bitcast fp128 %x to i128
198  %cmp = icmp sgt i128 %0, -1
199  %cond = select i1 %cmp, fp128 %x, fp128 %y
200  ret fp128 %cond
201}
202
203; C code:
204; __float128 TestI128_3(__float128 x, int *ex)
205; {
206;  union IEEEl2bits u;
207;  u.e = x;
208;  if (u.bits.exp == 0) {
209;    u.e *= 0x1.0p514;
210;    u.bits.exp = 0x3ffe;
211;  }
212;  return (u.e);
213; }
214define fp128 @TestI128_3(fp128 %x, ptr nocapture readnone %ex) #0 {
215; SSE-LABEL: TestI128_3:
216; SSE:       # %bb.0: # %entry
217; SSE-NEXT:    subq $56, %rsp
218; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
219; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %rax
220; SSE-NEXT:    movabsq $9223090561878065152, %rcx # imm = 0x7FFF000000000000
221; SSE-NEXT:    testq %rcx, %rax
222; SSE-NEXT:    je .LBB4_2
223; SSE-NEXT:  # %bb.1:
224; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
225; SSE-NEXT:    jmp .LBB4_3
226; SSE-NEXT:  .LBB4_2: # %if.then
227; SSE-NEXT:    movaps {{.*#+}} xmm1 = [5.36312317197703883982960999928233845E+154]
228; SSE-NEXT:    callq __multf3@PLT
229; SSE-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
230; SSE-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
231; SSE-NEXT:    movabsq $-9223090561878065153, %rdx # imm = 0x8000FFFFFFFFFFFF
232; SSE-NEXT:    andq {{[0-9]+}}(%rsp), %rdx
233; SSE-NEXT:    movabsq $4611123068473966592, %rax # imm = 0x3FFE000000000000
234; SSE-NEXT:    orq %rdx, %rax
235; SSE-NEXT:  .LBB4_3: # %if.end
236; SSE-NEXT:    movq %rcx, (%rsp)
237; SSE-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
238; SSE-NEXT:    movaps (%rsp), %xmm0
239; SSE-NEXT:    addq $56, %rsp
240; SSE-NEXT:    retq
241;
242; AVX-LABEL: TestI128_3:
243; AVX:       # %bb.0: # %entry
244; AVX-NEXT:    subq $56, %rsp
245; AVX-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp)
246; AVX-NEXT:    movq {{[0-9]+}}(%rsp), %rax
247; AVX-NEXT:    movabsq $9223090561878065152, %rcx # imm = 0x7FFF000000000000
248; AVX-NEXT:    testq %rcx, %rax
249; AVX-NEXT:    je .LBB4_2
250; AVX-NEXT:  # %bb.1:
251; AVX-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
252; AVX-NEXT:    jmp .LBB4_3
253; AVX-NEXT:  .LBB4_2: # %if.then
254; AVX-NEXT:    vmovaps {{.*#+}} xmm1 = [5.36312317197703883982960999928233845E+154]
255; AVX-NEXT:    callq __multf3@PLT
256; AVX-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%rsp)
257; AVX-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
258; AVX-NEXT:    movabsq $-9223090561878065153, %rdx # imm = 0x8000FFFFFFFFFFFF
259; AVX-NEXT:    andq {{[0-9]+}}(%rsp), %rdx
260; AVX-NEXT:    movabsq $4611123068473966592, %rax # imm = 0x3FFE000000000000
261; AVX-NEXT:    orq %rdx, %rax
262; AVX-NEXT:  .LBB4_3: # %if.end
263; AVX-NEXT:    movq %rcx, (%rsp)
264; AVX-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
265; AVX-NEXT:    vmovaps (%rsp), %xmm0
266; AVX-NEXT:    addq $56, %rsp
267; AVX-NEXT:    retq
268entry:
269  %0 = bitcast fp128 %x to i128
270  %bf.cast = and i128 %0, 170135991163610696904058773219554885632
271  %cmp = icmp eq i128 %bf.cast, 0
272  br i1 %cmp, label %if.then, label %if.end
273
274if.then:                                          ; preds = %entry
275  %mul = fmul fp128 %x, 0xL00000000000000004201000000000000
276  %1 = bitcast fp128 %mul to i128
277  %bf.clear4 = and i128 %1, -170135991163610696904058773219554885633
278  %bf.set = or i128 %bf.clear4, 85060207136517546210586590865283612672
279  br label %if.end
280
281if.end:                                           ; preds = %if.then, %entry
282  %u.sroa.0.0 = phi i128 [ %bf.set, %if.then ], [ %0, %entry ]
283  %2 = bitcast i128 %u.sroa.0.0 to fp128
284  ret fp128 %2
285}
286
287; C code:
288; __float128 TestI128_4(__float128 x)
289; {
290;  union IEEEl2bits u;
291;  __float128 df;
292;  u.e = x;
293;  u.xbits.manl = 0;
294;  df = u.e;
295;  return x + df;
296; }
297define fp128 @TestI128_4(fp128 %x) #0 {
298; SSE-LABEL: TestI128_4:
299; SSE:       # %bb.0: # %entry
300; SSE-NEXT:    movaps %xmm0, %xmm1
301; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
302; SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
303; SSE-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
304; SSE-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
305; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
306; SSE-NEXT:    jmp __addtf3@PLT # TAILCALL
307;
308; AVX-LABEL: TestI128_4:
309; AVX:       # %bb.0: # %entry
310; AVX-NEXT:    vmovaps %xmm0, %xmm1
311; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
312; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
313; AVX-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
314; AVX-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
315; AVX-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
316; AVX-NEXT:    jmp __addtf3@PLT # TAILCALL
317entry:
318  %0 = bitcast fp128 %x to i128
319  %bf.clear = and i128 %0, -18446744073709551616
320  %1 = bitcast i128 %bf.clear to fp128
321  %add = fadd fp128 %1, %x
322  ret fp128 %add
323}
324
325@v128 = common dso_local global i128 0, align 16
326@v128_2 = common dso_local global i128 0, align 16
327
328; C code:
329; unsigned __int128 v128, v128_2;
330; void TestShift128_2() {
331;   v128 = ((v128 << 96) | v128_2);
332; }
333define dso_local void @TestShift128_2() #2 {
334; CHECK-LABEL: TestShift128_2:
335; CHECK:       # %bb.0: # %entry
336; CHECK-NEXT:    movq v128(%rip), %rax
337; CHECK-NEXT:    shlq $32, %rax
338; CHECK-NEXT:    movq v128_2(%rip), %rcx
339; CHECK-NEXT:    orq v128_2+8(%rip), %rax
340; CHECK-NEXT:    movq %rcx, v128(%rip)
341; CHECK-NEXT:    movq %rax, v128+8(%rip)
342; CHECK-NEXT:    retq
343entry:
344  %0 = load i128, ptr @v128, align 16
345  %shl = shl i128 %0, 96
346  %1 = load i128, ptr @v128_2, align 16
347  %or = or i128 %shl, %1
348  store i128 %or, ptr @v128, align 16
349  ret void
350}
351
352define fp128 @acosl(fp128 %x) #0 {
353; SSE-LABEL: acosl:
354; SSE:       # %bb.0: # %entry
355; SSE-NEXT:    movaps %xmm0, %xmm1
356; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
357; SSE-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
358; SSE-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
359; SSE-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
360; SSE-NEXT:    movaps -{{[0-9]+}}(%rsp), %xmm0
361; SSE-NEXT:    jmp __addtf3@PLT # TAILCALL
362;
363; AVX-LABEL: acosl:
364; AVX:       # %bb.0: # %entry
365; AVX-NEXT:    vmovaps %xmm0, %xmm1
366; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
367; AVX-NEXT:    movq -{{[0-9]+}}(%rsp), %rax
368; AVX-NEXT:    movq %rax, -{{[0-9]+}}(%rsp)
369; AVX-NEXT:    movq $0, -{{[0-9]+}}(%rsp)
370; AVX-NEXT:    vmovaps -{{[0-9]+}}(%rsp), %xmm0
371; AVX-NEXT:    jmp __addtf3@PLT # TAILCALL
372entry:
373  %0 = bitcast fp128 %x to i128
374  %bf.clear = and i128 %0, -18446744073709551616
375  %1 = bitcast i128 %bf.clear to fp128
376  %add = fadd fp128 %1, %x
377  ret fp128 %add
378}
379
380; Compare i128 values and check i128 constants.
381define fp128 @TestComp(fp128 %x, fp128 %y) #0 {
382; SSE-LABEL: TestComp:
383; SSE:       # %bb.0: # %entry
384; SSE-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
385; SSE-NEXT:    cmpq $0, -{{[0-9]+}}(%rsp)
386; SSE-NEXT:    jns .LBB8_2
387; SSE-NEXT:  # %bb.1: # %entry
388; SSE-NEXT:    movaps %xmm1, %xmm0
389; SSE-NEXT:  .LBB8_2: # %entry
390; SSE-NEXT:    retq
391;
392; AVX-LABEL: TestComp:
393; AVX:       # %bb.0: # %entry
394; AVX-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
395; AVX-NEXT:    cmpq $0, -{{[0-9]+}}(%rsp)
396; AVX-NEXT:    jns .LBB8_2
397; AVX-NEXT:  # %bb.1: # %entry
398; AVX-NEXT:    vmovaps %xmm1, %xmm0
399; AVX-NEXT:  .LBB8_2: # %entry
400; AVX-NEXT:    retq
401entry:
402  %0 = bitcast fp128 %x to i128
403  %cmp = icmp sgt i128 %0, -1
404  %cond = select i1 %cmp, fp128 %x, fp128 %y
405  ret fp128 %cond
406}
407
408declare dso_local void @foo(fp128) #1
409
410; Test logical operations on fp128 values.
411define fp128 @TestFABS_LD(fp128 %x) #0 {
412; SSE-LABEL: TestFABS_LD:
413; SSE:       # %bb.0: # %entry
414; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
415; SSE-NEXT:    retq
416;
417; AVX-LABEL: TestFABS_LD:
418; AVX:       # %bb.0: # %entry
419; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
420; AVX-NEXT:    retq
421entry:
422  %call = tail call fp128 @fabsl(fp128 %x) #2
423  ret fp128 %call
424}
425
426declare fp128 @fabsl(fp128) #1
427
428declare fp128 @copysignl(fp128, fp128) #1
429
430; Test more complicated logical operations generated from copysignl.
431define dso_local void @TestCopySign(ptr noalias nocapture sret({ fp128, fp128 }) %agg.result, ptr byval({ fp128, fp128 }) nocapture readonly align 16 %z) #0 {
432; SSE-LABEL: TestCopySign:
433; SSE:       # %bb.0: # %entry
434; SSE-NEXT:    pushq %rbp
435; SSE-NEXT:    pushq %rbx
436; SSE-NEXT:    subq $40, %rsp
437; SSE-NEXT:    movq %rdi, %rbx
438; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
439; SSE-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
440; SSE-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
441; SSE-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
442; SSE-NEXT:    callq __gttf2@PLT
443; SSE-NEXT:    movl %eax, %ebp
444; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
445; SSE-NEXT:    movaps %xmm0, %xmm1
446; SSE-NEXT:    callq __subtf3@PLT
447; SSE-NEXT:    testl %ebp, %ebp
448; SSE-NEXT:    jle .LBB10_1
449; SSE-NEXT:  # %bb.2: # %if.then
450; SSE-NEXT:    movaps %xmm0, %xmm1
451; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
452; SSE-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
453; SSE-NEXT:    jmp .LBB10_3
454; SSE-NEXT:  .LBB10_1:
455; SSE-NEXT:    movaps (%rsp), %xmm1 # 16-byte Reload
456; SSE-NEXT:  .LBB10_3: # %cleanup
457; SSE-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
458; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
459; SSE-NEXT:    andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
460; SSE-NEXT:    orps %xmm2, %xmm0
461; SSE-NEXT:    movaps %xmm1, (%rbx)
462; SSE-NEXT:    movaps %xmm0, 16(%rbx)
463; SSE-NEXT:    movq %rbx, %rax
464; SSE-NEXT:    addq $40, %rsp
465; SSE-NEXT:    popq %rbx
466; SSE-NEXT:    popq %rbp
467; SSE-NEXT:    retq
468;
469; AVX-LABEL: TestCopySign:
470; AVX:       # %bb.0: # %entry
471; AVX-NEXT:    pushq %rbp
472; AVX-NEXT:    pushq %rbx
473; AVX-NEXT:    subq $40, %rsp
474; AVX-NEXT:    movq %rdi, %rbx
475; AVX-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm0
476; AVX-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm1
477; AVX-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
478; AVX-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
479; AVX-NEXT:    callq __gttf2@PLT
480; AVX-NEXT:    movl %eax, %ebp
481; AVX-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
482; AVX-NEXT:    vmovaps %xmm0, %xmm1
483; AVX-NEXT:    callq __subtf3@PLT
484; AVX-NEXT:    testl %ebp, %ebp
485; AVX-NEXT:    jle .LBB10_1
486; AVX-NEXT:  # %bb.2: # %if.then
487; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
488; AVX-NEXT:    vmovaps (%rsp), %xmm0 # 16-byte Reload
489; AVX-NEXT:    jmp .LBB10_3
490; AVX-NEXT:  .LBB10_1:
491; AVX-NEXT:    vmovaps (%rsp), %xmm2 # 16-byte Reload
492; AVX-NEXT:  .LBB10_3: # %cleanup
493; AVX-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
494; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
495; AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
496; AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
497; AVX-NEXT:    vmovaps %xmm2, (%rbx)
498; AVX-NEXT:    vmovaps %xmm0, 16(%rbx)
499; AVX-NEXT:    movq %rbx, %rax
500; AVX-NEXT:    addq $40, %rsp
501; AVX-NEXT:    popq %rbx
502; AVX-NEXT:    popq %rbp
503; AVX-NEXT:    retq
504entry:
505  %z.realp = getelementptr inbounds { fp128, fp128 }, ptr %z, i64 0, i32 0
506  %z.real = load fp128, ptr %z.realp, align 16
507  %z.imagp = getelementptr inbounds { fp128, fp128 }, ptr %z, i64 0, i32 1
508  %z.imag4 = load fp128, ptr %z.imagp, align 16
509  %cmp = fcmp ogt fp128 %z.real, %z.imag4
510  %sub = fsub fp128 %z.imag4, %z.imag4
511  br i1 %cmp, label %if.then, label %cleanup
512
513if.then:                                          ; preds = %entry
514  %call = tail call fp128 @fabsl(fp128 %sub) #2
515  br label %cleanup
516
517cleanup:                                          ; preds = %entry, %if.then
518  %z.real.sink = phi fp128 [ %z.real, %if.then ], [ %sub, %entry ]
519  %call.sink = phi fp128 [ %call, %if.then ], [ %z.real, %entry ]
520  %call5 = tail call fp128 @copysignl(fp128 %z.real.sink, fp128 %z.imag4) #2
521  %0 = getelementptr inbounds { fp128, fp128 }, ptr %agg.result, i64 0, i32 0
522  %1 = getelementptr inbounds { fp128, fp128 }, ptr %agg.result, i64 0, i32 1
523  store fp128 %call.sink, ptr %0, align 16
524  store fp128 %call5, ptr %1, align 16
525  ret void
526}
527
528
529attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" }
530attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" }
531attributes #2 = { nounwind readnone }
532