1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-android -mattr=+mmx -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-gnu -mattr=+mmx -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,SSE 4; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-android -mattr=+mmx,avx2 -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX 5; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-gnu -mattr=+mmx,avx2 -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX 6; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-android -mattr=+mmx,avx512vl -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX 7; RUN: llc < %s -O2 -verify-machineinstrs -mtriple=x86_64-linux-gnu -mattr=+mmx,avx512vl -enable-legalize-types-checking | FileCheck %s --check-prefixes=CHECK,AVX 8 9; These tests were generated from simplified libm C code. 10; When compiled for the x86_64-linux-android target, 11; long double is mapped to f128 type that should be passed 12; in SSE registers. When the f128 type calling convention 13; problem was fixed, old llvm code failed to handle f128 values 14; in several f128/i128 type operations. These unit tests hopefully 15; will catch regression in any future change in this area. 16; To modified or enhance these test cases, please consult libm 17; code pattern and compile with -target x86_64-linux-android 18; to generate IL. The __float128 keyword if not accepted by 19; clang, just define it to "long double". 20; 21 22; typedef long double __float128; 23; union IEEEl2bits { 24; __float128 e; 25; struct { 26; unsigned long manl :64; 27; unsigned long manh :48; 28; unsigned int exp :15; 29; unsigned int sign :1; 30; } bits; 31; struct { 32; unsigned long manl :64; 33; unsigned long manh :48; 34; unsigned int expsign :16; 35; } xbits; 36; }; 37 38; C code: 39; void foo(__float128 x); 40; void TestUnionLD1(__float128 s, unsigned long n) { 41; union IEEEl2bits u; 42; __float128 w; 43; u.e = s; 44; u.bits.manh = n; 45; w = u.e; 46; foo(w); 47; } 48define dso_local void @TestUnionLD1(fp128 %s, i64 %n) #0 { 49; SSE-LABEL: TestUnionLD1: 50; SSE: # %bb.0: # %entry 51; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 52; SSE-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax 53; SSE-NEXT: shlq $48, %rax 54; SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 55; SSE-NEXT: movabsq $281474976710655, %rdx # imm = 0xFFFFFFFFFFFF 56; SSE-NEXT: andq %rdi, %rdx 57; SSE-NEXT: orq %rax, %rdx 58; SSE-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) 59; SSE-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) 60; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 61; SSE-NEXT: jmp foo # TAILCALL 62; 63; AVX-LABEL: TestUnionLD1: 64; AVX: # %bb.0: # %entry 65; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 66; AVX-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax 67; AVX-NEXT: shlq $48, %rax 68; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 69; AVX-NEXT: movabsq $281474976710655, %rdx # imm = 0xFFFFFFFFFFFF 70; AVX-NEXT: andq %rdi, %rdx 71; AVX-NEXT: orq %rax, %rdx 72; AVX-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) 73; AVX-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) 74; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 75; AVX-NEXT: jmp foo # TAILCALL 76entry: 77 %0 = bitcast fp128 %s to i128 78 %1 = zext i64 %n to i128 79 %bf.value = shl nuw i128 %1, 64 80 %bf.shl = and i128 %bf.value, 5192296858534809181786422619668480 81 %bf.clear = and i128 %0, -5192296858534809181786422619668481 82 %bf.set = or i128 %bf.shl, %bf.clear 83 %2 = bitcast i128 %bf.set to fp128 84 tail call void @foo(fp128 %2) #2 85 ret void 86} 87 88; C code: 89; __float128 TestUnionLD2(__float128 s) { 90; union IEEEl2bits u; 91; __float128 w; 92; u.e = s; 93; u.bits.manl = 0; 94; w = u.e; 95; return w; 96; } 97define fp128 @TestUnionLD2(fp128 %s) #0 { 98; SSE-LABEL: TestUnionLD2: 99; SSE: # %bb.0: # %entry 100; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 101; SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax 102; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) 103; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) 104; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 105; SSE-NEXT: retq 106; 107; AVX-LABEL: TestUnionLD2: 108; AVX: # %bb.0: # %entry 109; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 110; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rax 111; AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp) 112; AVX-NEXT: movq $0, -{{[0-9]+}}(%rsp) 113; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 114; AVX-NEXT: retq 115entry: 116 %0 = bitcast fp128 %s to i128 117 %bf.clear = and i128 %0, -18446744073709551616 118 %1 = bitcast i128 %bf.clear to fp128 119 ret fp128 %1 120} 121 122; C code: 123; __float128 TestI128_1(__float128 x) 124; { 125; union IEEEl2bits z; 126; z.e = x; 127; z.bits.sign = 0; 128; return (z.e < 0.1L) ? 1.0L : 2.0L; 129; } 130define fp128 @TestI128_1(fp128 %x) #0 { 131; SSE-LABEL: TestI128_1: 132; SSE: # %bb.0: # %entry 133; SSE-NEXT: pushq %rax 134; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 135; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.00000000000000000000000000000000005E-1] 136; SSE-NEXT: callq __lttf2@PLT 137; SSE-NEXT: xorl %ecx, %ecx 138; SSE-NEXT: testl %eax, %eax 139; SSE-NEXT: sets %cl 140; SSE-NEXT: shll $4, %ecx 141; SSE-NEXT: movaps {{.*#+}} xmm0 = [?] 142; SSE-NEXT: popq %rax 143; SSE-NEXT: retq 144; 145; AVX-LABEL: TestI128_1: 146; AVX: # %bb.0: # %entry 147; AVX-NEXT: pushq %rax 148; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 149; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [1.00000000000000000000000000000000005E-1] 150; AVX-NEXT: callq __lttf2@PLT 151; AVX-NEXT: xorl %ecx, %ecx 152; AVX-NEXT: testl %eax, %eax 153; AVX-NEXT: sets %cl 154; AVX-NEXT: shll $4, %ecx 155; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [?] 156; AVX-NEXT: popq %rax 157; AVX-NEXT: retq 158entry: 159 %0 = bitcast fp128 %x to i128 160 %bf.clear = and i128 %0, 170141183460469231731687303715884105727 161 %1 = bitcast i128 %bf.clear to fp128 162 %cmp = fcmp olt fp128 %1, 0xL999999999999999A3FFB999999999999 163 %cond = select i1 %cmp, fp128 0xL00000000000000003FFF000000000000, fp128 0xL00000000000000004000000000000000 164 ret fp128 %cond 165} 166 167; C code: 168; __float128 TestI128_2(__float128 x, __float128 y) 169; { 170; unsigned short hx; 171; union IEEEl2bits ge_u; 172; ge_u.e = x; 173; hx = ge_u.xbits.expsign; 174; return (hx & 0x8000) == 0 ? x : y; 175; } 176define fp128 @TestI128_2(fp128 %x, fp128 %y) #0 { 177; SSE-LABEL: TestI128_2: 178; SSE: # %bb.0: # %entry 179; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 180; SSE-NEXT: cmpq $0, -{{[0-9]+}}(%rsp) 181; SSE-NEXT: jns .LBB3_2 182; SSE-NEXT: # %bb.1: # %entry 183; SSE-NEXT: movaps %xmm1, %xmm0 184; SSE-NEXT: .LBB3_2: # %entry 185; SSE-NEXT: retq 186; 187; AVX-LABEL: TestI128_2: 188; AVX: # %bb.0: # %entry 189; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 190; AVX-NEXT: cmpq $0, -{{[0-9]+}}(%rsp) 191; AVX-NEXT: jns .LBB3_2 192; AVX-NEXT: # %bb.1: # %entry 193; AVX-NEXT: vmovaps %xmm1, %xmm0 194; AVX-NEXT: .LBB3_2: # %entry 195; AVX-NEXT: retq 196entry: 197 %0 = bitcast fp128 %x to i128 198 %cmp = icmp sgt i128 %0, -1 199 %cond = select i1 %cmp, fp128 %x, fp128 %y 200 ret fp128 %cond 201} 202 203; C code: 204; __float128 TestI128_3(__float128 x, int *ex) 205; { 206; union IEEEl2bits u; 207; u.e = x; 208; if (u.bits.exp == 0) { 209; u.e *= 0x1.0p514; 210; u.bits.exp = 0x3ffe; 211; } 212; return (u.e); 213; } 214define fp128 @TestI128_3(fp128 %x, ptr nocapture readnone %ex) #0 { 215; SSE-LABEL: TestI128_3: 216; SSE: # %bb.0: # %entry 217; SSE-NEXT: subq $56, %rsp 218; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) 219; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax 220; SSE-NEXT: movabsq $9223090561878065152, %rcx # imm = 0x7FFF000000000000 221; SSE-NEXT: testq %rcx, %rax 222; SSE-NEXT: je .LBB4_2 223; SSE-NEXT: # %bb.1: 224; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx 225; SSE-NEXT: jmp .LBB4_3 226; SSE-NEXT: .LBB4_2: # %if.then 227; SSE-NEXT: movaps {{.*#+}} xmm1 = [5.36312317197703883982960999928233845E+154] 228; SSE-NEXT: callq __multf3@PLT 229; SSE-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) 230; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rcx 231; SSE-NEXT: movabsq $-9223090561878065153, %rdx # imm = 0x8000FFFFFFFFFFFF 232; SSE-NEXT: andq {{[0-9]+}}(%rsp), %rdx 233; SSE-NEXT: movabsq $4611123068473966592, %rax # imm = 0x3FFE000000000000 234; SSE-NEXT: orq %rdx, %rax 235; SSE-NEXT: .LBB4_3: # %if.end 236; SSE-NEXT: movq %rcx, (%rsp) 237; SSE-NEXT: movq %rax, {{[0-9]+}}(%rsp) 238; SSE-NEXT: movaps (%rsp), %xmm0 239; SSE-NEXT: addq $56, %rsp 240; SSE-NEXT: retq 241; 242; AVX-LABEL: TestI128_3: 243; AVX: # %bb.0: # %entry 244; AVX-NEXT: subq $56, %rsp 245; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) 246; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rax 247; AVX-NEXT: movabsq $9223090561878065152, %rcx # imm = 0x7FFF000000000000 248; AVX-NEXT: testq %rcx, %rax 249; AVX-NEXT: je .LBB4_2 250; AVX-NEXT: # %bb.1: 251; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rcx 252; AVX-NEXT: jmp .LBB4_3 253; AVX-NEXT: .LBB4_2: # %if.then 254; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [5.36312317197703883982960999928233845E+154] 255; AVX-NEXT: callq __multf3@PLT 256; AVX-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) 257; AVX-NEXT: movq {{[0-9]+}}(%rsp), %rcx 258; AVX-NEXT: movabsq $-9223090561878065153, %rdx # imm = 0x8000FFFFFFFFFFFF 259; AVX-NEXT: andq {{[0-9]+}}(%rsp), %rdx 260; AVX-NEXT: movabsq $4611123068473966592, %rax # imm = 0x3FFE000000000000 261; AVX-NEXT: orq %rdx, %rax 262; AVX-NEXT: .LBB4_3: # %if.end 263; AVX-NEXT: movq %rcx, (%rsp) 264; AVX-NEXT: movq %rax, {{[0-9]+}}(%rsp) 265; AVX-NEXT: vmovaps (%rsp), %xmm0 266; AVX-NEXT: addq $56, %rsp 267; AVX-NEXT: retq 268entry: 269 %0 = bitcast fp128 %x to i128 270 %bf.cast = and i128 %0, 170135991163610696904058773219554885632 271 %cmp = icmp eq i128 %bf.cast, 0 272 br i1 %cmp, label %if.then, label %if.end 273 274if.then: ; preds = %entry 275 %mul = fmul fp128 %x, 0xL00000000000000004201000000000000 276 %1 = bitcast fp128 %mul to i128 277 %bf.clear4 = and i128 %1, -170135991163610696904058773219554885633 278 %bf.set = or i128 %bf.clear4, 85060207136517546210586590865283612672 279 br label %if.end 280 281if.end: ; preds = %if.then, %entry 282 %u.sroa.0.0 = phi i128 [ %bf.set, %if.then ], [ %0, %entry ] 283 %2 = bitcast i128 %u.sroa.0.0 to fp128 284 ret fp128 %2 285} 286 287; C code: 288; __float128 TestI128_4(__float128 x) 289; { 290; union IEEEl2bits u; 291; __float128 df; 292; u.e = x; 293; u.xbits.manl = 0; 294; df = u.e; 295; return x + df; 296; } 297define fp128 @TestI128_4(fp128 %x) #0 { 298; SSE-LABEL: TestI128_4: 299; SSE: # %bb.0: # %entry 300; SSE-NEXT: movaps %xmm0, %xmm1 301; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 302; SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax 303; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) 304; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) 305; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 306; SSE-NEXT: jmp __addtf3@PLT # TAILCALL 307; 308; AVX-LABEL: TestI128_4: 309; AVX: # %bb.0: # %entry 310; AVX-NEXT: vmovaps %xmm0, %xmm1 311; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 312; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rax 313; AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp) 314; AVX-NEXT: movq $0, -{{[0-9]+}}(%rsp) 315; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 316; AVX-NEXT: jmp __addtf3@PLT # TAILCALL 317entry: 318 %0 = bitcast fp128 %x to i128 319 %bf.clear = and i128 %0, -18446744073709551616 320 %1 = bitcast i128 %bf.clear to fp128 321 %add = fadd fp128 %1, %x 322 ret fp128 %add 323} 324 325@v128 = common dso_local global i128 0, align 16 326@v128_2 = common dso_local global i128 0, align 16 327 328; C code: 329; unsigned __int128 v128, v128_2; 330; void TestShift128_2() { 331; v128 = ((v128 << 96) | v128_2); 332; } 333define dso_local void @TestShift128_2() #2 { 334; CHECK-LABEL: TestShift128_2: 335; CHECK: # %bb.0: # %entry 336; CHECK-NEXT: movq v128(%rip), %rax 337; CHECK-NEXT: shlq $32, %rax 338; CHECK-NEXT: movq v128_2(%rip), %rcx 339; CHECK-NEXT: orq v128_2+8(%rip), %rax 340; CHECK-NEXT: movq %rcx, v128(%rip) 341; CHECK-NEXT: movq %rax, v128+8(%rip) 342; CHECK-NEXT: retq 343entry: 344 %0 = load i128, ptr @v128, align 16 345 %shl = shl i128 %0, 96 346 %1 = load i128, ptr @v128_2, align 16 347 %or = or i128 %shl, %1 348 store i128 %or, ptr @v128, align 16 349 ret void 350} 351 352define fp128 @acosl(fp128 %x) #0 { 353; SSE-LABEL: acosl: 354; SSE: # %bb.0: # %entry 355; SSE-NEXT: movaps %xmm0, %xmm1 356; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 357; SSE-NEXT: movq -{{[0-9]+}}(%rsp), %rax 358; SSE-NEXT: movq %rax, -{{[0-9]+}}(%rsp) 359; SSE-NEXT: movq $0, -{{[0-9]+}}(%rsp) 360; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 361; SSE-NEXT: jmp __addtf3@PLT # TAILCALL 362; 363; AVX-LABEL: acosl: 364; AVX: # %bb.0: # %entry 365; AVX-NEXT: vmovaps %xmm0, %xmm1 366; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 367; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rax 368; AVX-NEXT: movq %rax, -{{[0-9]+}}(%rsp) 369; AVX-NEXT: movq $0, -{{[0-9]+}}(%rsp) 370; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 371; AVX-NEXT: jmp __addtf3@PLT # TAILCALL 372entry: 373 %0 = bitcast fp128 %x to i128 374 %bf.clear = and i128 %0, -18446744073709551616 375 %1 = bitcast i128 %bf.clear to fp128 376 %add = fadd fp128 %1, %x 377 ret fp128 %add 378} 379 380; Compare i128 values and check i128 constants. 381define fp128 @TestComp(fp128 %x, fp128 %y) #0 { 382; SSE-LABEL: TestComp: 383; SSE: # %bb.0: # %entry 384; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 385; SSE-NEXT: cmpq $0, -{{[0-9]+}}(%rsp) 386; SSE-NEXT: jns .LBB8_2 387; SSE-NEXT: # %bb.1: # %entry 388; SSE-NEXT: movaps %xmm1, %xmm0 389; SSE-NEXT: .LBB8_2: # %entry 390; SSE-NEXT: retq 391; 392; AVX-LABEL: TestComp: 393; AVX: # %bb.0: # %entry 394; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 395; AVX-NEXT: cmpq $0, -{{[0-9]+}}(%rsp) 396; AVX-NEXT: jns .LBB8_2 397; AVX-NEXT: # %bb.1: # %entry 398; AVX-NEXT: vmovaps %xmm1, %xmm0 399; AVX-NEXT: .LBB8_2: # %entry 400; AVX-NEXT: retq 401entry: 402 %0 = bitcast fp128 %x to i128 403 %cmp = icmp sgt i128 %0, -1 404 %cond = select i1 %cmp, fp128 %x, fp128 %y 405 ret fp128 %cond 406} 407 408declare dso_local void @foo(fp128) #1 409 410; Test logical operations on fp128 values. 411define fp128 @TestFABS_LD(fp128 %x) #0 { 412; SSE-LABEL: TestFABS_LD: 413; SSE: # %bb.0: # %entry 414; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 415; SSE-NEXT: retq 416; 417; AVX-LABEL: TestFABS_LD: 418; AVX: # %bb.0: # %entry 419; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 420; AVX-NEXT: retq 421entry: 422 %call = tail call fp128 @fabsl(fp128 %x) #2 423 ret fp128 %call 424} 425 426declare fp128 @fabsl(fp128) #1 427 428declare fp128 @copysignl(fp128, fp128) #1 429 430; Test more complicated logical operations generated from copysignl. 431define dso_local void @TestCopySign(ptr noalias nocapture sret({ fp128, fp128 }) %agg.result, ptr byval({ fp128, fp128 }) nocapture readonly align 16 %z) #0 { 432; SSE-LABEL: TestCopySign: 433; SSE: # %bb.0: # %entry 434; SSE-NEXT: pushq %rbp 435; SSE-NEXT: pushq %rbx 436; SSE-NEXT: subq $40, %rsp 437; SSE-NEXT: movq %rdi, %rbx 438; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 439; SSE-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 440; SSE-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 441; SSE-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 442; SSE-NEXT: callq __gttf2@PLT 443; SSE-NEXT: movl %eax, %ebp 444; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 445; SSE-NEXT: movaps %xmm0, %xmm1 446; SSE-NEXT: callq __subtf3@PLT 447; SSE-NEXT: testl %ebp, %ebp 448; SSE-NEXT: jle .LBB10_1 449; SSE-NEXT: # %bb.2: # %if.then 450; SSE-NEXT: movaps %xmm0, %xmm1 451; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 452; SSE-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 453; SSE-NEXT: jmp .LBB10_3 454; SSE-NEXT: .LBB10_1: 455; SSE-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 456; SSE-NEXT: .LBB10_3: # %cleanup 457; SSE-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload 458; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 459; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 460; SSE-NEXT: orps %xmm2, %xmm0 461; SSE-NEXT: movaps %xmm1, (%rbx) 462; SSE-NEXT: movaps %xmm0, 16(%rbx) 463; SSE-NEXT: movq %rbx, %rax 464; SSE-NEXT: addq $40, %rsp 465; SSE-NEXT: popq %rbx 466; SSE-NEXT: popq %rbp 467; SSE-NEXT: retq 468; 469; AVX-LABEL: TestCopySign: 470; AVX: # %bb.0: # %entry 471; AVX-NEXT: pushq %rbp 472; AVX-NEXT: pushq %rbx 473; AVX-NEXT: subq $40, %rsp 474; AVX-NEXT: movq %rdi, %rbx 475; AVX-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 476; AVX-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm1 477; AVX-NEXT: vmovaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 478; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 479; AVX-NEXT: callq __gttf2@PLT 480; AVX-NEXT: movl %eax, %ebp 481; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 482; AVX-NEXT: vmovaps %xmm0, %xmm1 483; AVX-NEXT: callq __subtf3@PLT 484; AVX-NEXT: testl %ebp, %ebp 485; AVX-NEXT: jle .LBB10_1 486; AVX-NEXT: # %bb.2: # %if.then 487; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 488; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 489; AVX-NEXT: jmp .LBB10_3 490; AVX-NEXT: .LBB10_1: 491; AVX-NEXT: vmovaps (%rsp), %xmm2 # 16-byte Reload 492; AVX-NEXT: .LBB10_3: # %cleanup 493; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 494; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 495; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 496; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 497; AVX-NEXT: vmovaps %xmm2, (%rbx) 498; AVX-NEXT: vmovaps %xmm0, 16(%rbx) 499; AVX-NEXT: movq %rbx, %rax 500; AVX-NEXT: addq $40, %rsp 501; AVX-NEXT: popq %rbx 502; AVX-NEXT: popq %rbp 503; AVX-NEXT: retq 504entry: 505 %z.realp = getelementptr inbounds { fp128, fp128 }, ptr %z, i64 0, i32 0 506 %z.real = load fp128, ptr %z.realp, align 16 507 %z.imagp = getelementptr inbounds { fp128, fp128 }, ptr %z, i64 0, i32 1 508 %z.imag4 = load fp128, ptr %z.imagp, align 16 509 %cmp = fcmp ogt fp128 %z.real, %z.imag4 510 %sub = fsub fp128 %z.imag4, %z.imag4 511 br i1 %cmp, label %if.then, label %cleanup 512 513if.then: ; preds = %entry 514 %call = tail call fp128 @fabsl(fp128 %sub) #2 515 br label %cleanup 516 517cleanup: ; preds = %entry, %if.then 518 %z.real.sink = phi fp128 [ %z.real, %if.then ], [ %sub, %entry ] 519 %call.sink = phi fp128 [ %call, %if.then ], [ %z.real, %entry ] 520 %call5 = tail call fp128 @copysignl(fp128 %z.real.sink, fp128 %z.imag4) #2 521 %0 = getelementptr inbounds { fp128, fp128 }, ptr %agg.result, i64 0, i32 0 522 %1 = getelementptr inbounds { fp128, fp128 }, ptr %agg.result, i64 0, i32 1 523 store fp128 %call.sink, ptr %0, align 16 524 store fp128 %call5, ptr %1, align 16 525 ret void 526} 527 528 529attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" } 530attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+ssse3,+sse3,+popcnt,+sse,+sse2,+sse4.1,+sse4.2" "unsafe-fp-math"="false" "use-soft-float"="false" } 531attributes #2 = { nounwind readnone } 532