1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-SSE 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX1OR2,X86-AVX1 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX1OR2,X86-AVX2 5; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512VL 6; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512FP16 7; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX512,X86-AVX512VLDQ 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64,X64-SSE 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1OR2,X64-AVX1 10; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1OR2,X64-AVX2 11; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512VL 12; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512FP16 13; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX512,X64-AVX512VLDQ 14 15; 16; 128-bit Vectors 17; 18 19define <2 x double> @fcopysign_v2f64(<2 x double> %a0, <2 x double> %a1) nounwind { 20; X86-SSE-LABEL: fcopysign_v2f64: 21; X86-SSE: # %bb.0: 22; X86-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 23; X86-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 24; X86-SSE-NEXT: orps %xmm1, %xmm0 25; X86-SSE-NEXT: retl 26; 27; X86-AVX1OR2-LABEL: fcopysign_v2f64: 28; X86-AVX1OR2: # %bb.0: 29; X86-AVX1OR2-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 30; X86-AVX1OR2-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 31; X86-AVX1OR2-NEXT: vorps %xmm1, %xmm0, %xmm0 32; X86-AVX1OR2-NEXT: retl 33; 34; X86-AVX512-LABEL: fcopysign_v2f64: 35; X86-AVX512: # %bb.0: 36; X86-AVX512-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm1, %xmm0 37; X86-AVX512-NEXT: retl 38; 39; X64-SSE-LABEL: fcopysign_v2f64: 40; X64-SSE: # %bb.0: 41; X64-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 42; X64-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 43; X64-SSE-NEXT: orps %xmm1, %xmm0 44; X64-SSE-NEXT: retq 45; 46; X64-AVX1OR2-LABEL: fcopysign_v2f64: 47; X64-AVX1OR2: # %bb.0: 48; X64-AVX1OR2-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 49; X64-AVX1OR2-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 50; X64-AVX1OR2-NEXT: vorps %xmm1, %xmm0, %xmm0 51; X64-AVX1OR2-NEXT: retq 52; 53; X64-AVX512-LABEL: fcopysign_v2f64: 54; X64-AVX512: # %bb.0: 55; X64-AVX512-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm1, %xmm0 56; X64-AVX512-NEXT: retq 57 %t = call <2 x double> @llvm.copysign.v2f64(<2 x double> %a0, <2 x double> %a1) 58 ret <2 x double> %t 59} 60declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) 61 62define <4 x float> @fcopysign_v4f32(<4 x float> %a0, <4 x float> %a1) nounwind { 63; X86-SSE-LABEL: fcopysign_v4f32: 64; X86-SSE: # %bb.0: 65; X86-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 66; X86-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 67; X86-SSE-NEXT: orps %xmm1, %xmm0 68; X86-SSE-NEXT: retl 69; 70; X86-AVX1-LABEL: fcopysign_v4f32: 71; X86-AVX1: # %bb.0: 72; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 73; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 74; X86-AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 75; X86-AVX1-NEXT: retl 76; 77; X86-AVX2-LABEL: fcopysign_v4f32: 78; X86-AVX2: # %bb.0: 79; X86-AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 80; X86-AVX2-NEXT: vandps %xmm2, %xmm1, %xmm1 81; X86-AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] 82; X86-AVX2-NEXT: vandps %xmm2, %xmm0, %xmm0 83; X86-AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0 84; X86-AVX2-NEXT: retl 85; 86; X86-AVX512-LABEL: fcopysign_v4f32: 87; X86-AVX512: # %bb.0: 88; X86-AVX512-NEXT: vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm1, %xmm0 89; X86-AVX512-NEXT: retl 90; 91; X64-SSE-LABEL: fcopysign_v4f32: 92; X64-SSE: # %bb.0: 93; X64-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 94; X64-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 95; X64-SSE-NEXT: orps %xmm1, %xmm0 96; X64-SSE-NEXT: retq 97; 98; X64-AVX1-LABEL: fcopysign_v4f32: 99; X64-AVX1: # %bb.0: 100; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 101; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 102; X64-AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 103; X64-AVX1-NEXT: retq 104; 105; X64-AVX2-LABEL: fcopysign_v4f32: 106; X64-AVX2: # %bb.0: 107; X64-AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 108; X64-AVX2-NEXT: vandps %xmm2, %xmm1, %xmm1 109; X64-AVX2-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] 110; X64-AVX2-NEXT: vandps %xmm2, %xmm0, %xmm0 111; X64-AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0 112; X64-AVX2-NEXT: retq 113; 114; X64-AVX512-LABEL: fcopysign_v4f32: 115; X64-AVX512: # %bb.0: 116; X64-AVX512-NEXT: vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm0 117; X64-AVX512-NEXT: retq 118 %t = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a0, <4 x float> %a1) 119 ret <4 x float> %t 120} 121declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) 122 123define <8 x half> @fcopysign_v8f16(ptr %p0, ptr %p1) nounwind { 124; X86-SSE-LABEL: fcopysign_v8f16: 125; X86-SSE: # %bb.0: 126; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 127; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 128; X86-SSE-NEXT: movaps (%ecx), %xmm0 129; X86-SSE-NEXT: movaps (%eax), %xmm1 130; X86-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 131; X86-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 132; X86-SSE-NEXT: orps %xmm1, %xmm0 133; X86-SSE-NEXT: retl 134; 135; X86-AVX1-LABEL: fcopysign_v8f16: 136; X86-AVX1: # %bb.0: 137; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax 138; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx 139; X86-AVX1-NEXT: vmovaps (%ecx), %xmm0 140; X86-AVX1-NEXT: vmovaps (%eax), %xmm1 141; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 142; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 143; X86-AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 144; X86-AVX1-NEXT: retl 145; 146; X86-AVX2-LABEL: fcopysign_v8f16: 147; X86-AVX2: # %bb.0: 148; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 149; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx 150; X86-AVX2-NEXT: vpbroadcastw {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 151; X86-AVX2-NEXT: vpand (%ecx), %xmm0, %xmm0 152; X86-AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 153; X86-AVX2-NEXT: vpand (%eax), %xmm1, %xmm1 154; X86-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 155; X86-AVX2-NEXT: retl 156; 157; X86-AVX512-LABEL: fcopysign_v8f16: 158; X86-AVX512: # %bb.0: 159; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax 160; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx 161; X86-AVX512-NEXT: vmovdqa (%ecx), %xmm1 162; X86-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147450879,2147450879,2147450879,2147450879] 163; X86-AVX512-NEXT: vpternlogd $202, (%eax), %xmm1, %xmm0 164; X86-AVX512-NEXT: retl 165; 166; X64-SSE-LABEL: fcopysign_v8f16: 167; X64-SSE: # %bb.0: 168; X64-SSE-NEXT: movaps (%rdi), %xmm0 169; X64-SSE-NEXT: movaps (%rsi), %xmm1 170; X64-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 171; X64-SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 172; X64-SSE-NEXT: orps %xmm1, %xmm0 173; X64-SSE-NEXT: retq 174; 175; X64-AVX1-LABEL: fcopysign_v8f16: 176; X64-AVX1: # %bb.0: 177; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 178; X64-AVX1-NEXT: vmovaps (%rsi), %xmm1 179; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 180; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 181; X64-AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 182; X64-AVX1-NEXT: retq 183; 184; X64-AVX2-LABEL: fcopysign_v8f16: 185; X64-AVX2: # %bb.0: 186; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} xmm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 187; X64-AVX2-NEXT: vpand (%rsi), %xmm0, %xmm0 188; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 189; X64-AVX2-NEXT: vpand (%rdi), %xmm1, %xmm1 190; X64-AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 191; X64-AVX2-NEXT: retq 192; 193; X64-AVX512-LABEL: fcopysign_v8f16: 194; X64-AVX512: # %bb.0: 195; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm1 196; X64-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147450879,2147450879,2147450879,2147450879] 197; X64-AVX512-NEXT: vpternlogd $202, (%rsi), %xmm1, %xmm0 198; X64-AVX512-NEXT: retq 199 %a0 = load <8 x half>, ptr %p0, align 16 200 %a1 = load <8 x half>, ptr %p1, align 16 201 %t = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a0, <8 x half> %a1) 202 ret <8 x half> %t 203} 204declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>) 205 206; 207; 256-bit Vectors 208; 209 210define <4 x double> @fcopysign_v4f64(<4 x double> %a0, <4 x double> %a1) nounwind { 211; X86-SSE-LABEL: fcopysign_v4f64: 212; X86-SSE: # %bb.0: 213; X86-SSE-NEXT: pushl %ebp 214; X86-SSE-NEXT: movl %esp, %ebp 215; X86-SSE-NEXT: andl $-16, %esp 216; X86-SSE-NEXT: subl $16, %esp 217; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [NaN,NaN] 218; X86-SSE-NEXT: movaps %xmm3, %xmm4 219; X86-SSE-NEXT: andnps %xmm2, %xmm4 220; X86-SSE-NEXT: andps %xmm3, %xmm0 221; X86-SSE-NEXT: orps %xmm4, %xmm0 222; X86-SSE-NEXT: andps %xmm3, %xmm1 223; X86-SSE-NEXT: andnps 8(%ebp), %xmm3 224; X86-SSE-NEXT: orps %xmm3, %xmm1 225; X86-SSE-NEXT: movl %ebp, %esp 226; X86-SSE-NEXT: popl %ebp 227; X86-SSE-NEXT: retl 228; 229; X86-AVX1-LABEL: fcopysign_v4f64: 230; X86-AVX1: # %bb.0: 231; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 232; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 233; X86-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 234; X86-AVX1-NEXT: retl 235; 236; X86-AVX2-LABEL: fcopysign_v4f64: 237; X86-AVX2: # %bb.0: 238; X86-AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 239; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 240; X86-AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN] 241; X86-AVX2-NEXT: vandps %ymm2, %ymm0, %ymm0 242; X86-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0 243; X86-AVX2-NEXT: retl 244; 245; X86-AVX512-LABEL: fcopysign_v4f64: 246; X86-AVX512: # %bb.0: 247; X86-AVX512-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm1, %ymm0 248; X86-AVX512-NEXT: retl 249; 250; X64-SSE-LABEL: fcopysign_v4f64: 251; X64-SSE: # %bb.0: 252; X64-SSE-NEXT: movaps {{.*#+}} xmm4 = [NaN,NaN] 253; X64-SSE-NEXT: movaps %xmm4, %xmm5 254; X64-SSE-NEXT: andnps %xmm2, %xmm5 255; X64-SSE-NEXT: andps %xmm4, %xmm0 256; X64-SSE-NEXT: orps %xmm5, %xmm0 257; X64-SSE-NEXT: andps %xmm4, %xmm1 258; X64-SSE-NEXT: andnps %xmm3, %xmm4 259; X64-SSE-NEXT: orps %xmm4, %xmm1 260; X64-SSE-NEXT: retq 261; 262; X64-AVX1-LABEL: fcopysign_v4f64: 263; X64-AVX1: # %bb.0: 264; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 265; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 266; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 267; X64-AVX1-NEXT: retq 268; 269; X64-AVX2-LABEL: fcopysign_v4f64: 270; X64-AVX2: # %bb.0: 271; X64-AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 272; X64-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 273; X64-AVX2-NEXT: vbroadcastsd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN] 274; X64-AVX2-NEXT: vandps %ymm2, %ymm0, %ymm0 275; X64-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0 276; X64-AVX2-NEXT: retq 277; 278; X64-AVX512-LABEL: fcopysign_v4f64: 279; X64-AVX512: # %bb.0: 280; X64-AVX512-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm1, %ymm0 281; X64-AVX512-NEXT: retq 282 %t = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a0, <4 x double> %a1) 283 ret <4 x double> %t 284} 285declare <4 x double> @llvm.copysign.v4f64(<4 x double>, <4 x double>) 286 287define <8 x float> @fcopysign_v8f32(<8 x float> %a0, <8 x float> %a1) nounwind { 288; X86-SSE-LABEL: fcopysign_v8f32: 289; X86-SSE: # %bb.0: 290; X86-SSE-NEXT: pushl %ebp 291; X86-SSE-NEXT: movl %esp, %ebp 292; X86-SSE-NEXT: andl $-16, %esp 293; X86-SSE-NEXT: subl $16, %esp 294; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN] 295; X86-SSE-NEXT: movaps %xmm3, %xmm4 296; X86-SSE-NEXT: andnps %xmm2, %xmm4 297; X86-SSE-NEXT: andps %xmm3, %xmm0 298; X86-SSE-NEXT: orps %xmm4, %xmm0 299; X86-SSE-NEXT: andps %xmm3, %xmm1 300; X86-SSE-NEXT: andnps 8(%ebp), %xmm3 301; X86-SSE-NEXT: orps %xmm3, %xmm1 302; X86-SSE-NEXT: movl %ebp, %esp 303; X86-SSE-NEXT: popl %ebp 304; X86-SSE-NEXT: retl 305; 306; X86-AVX1-LABEL: fcopysign_v8f32: 307; X86-AVX1: # %bb.0: 308; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 309; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 310; X86-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 311; X86-AVX1-NEXT: retl 312; 313; X86-AVX2-LABEL: fcopysign_v8f32: 314; X86-AVX2: # %bb.0: 315; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 316; X86-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 317; X86-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 318; X86-AVX2-NEXT: vandps %ymm2, %ymm0, %ymm0 319; X86-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0 320; X86-AVX2-NEXT: retl 321; 322; X86-AVX512-LABEL: fcopysign_v8f32: 323; X86-AVX512: # %bb.0: 324; X86-AVX512-NEXT: vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm1, %ymm0 325; X86-AVX512-NEXT: retl 326; 327; X64-SSE-LABEL: fcopysign_v8f32: 328; X64-SSE: # %bb.0: 329; X64-SSE-NEXT: movaps {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN] 330; X64-SSE-NEXT: movaps %xmm4, %xmm5 331; X64-SSE-NEXT: andnps %xmm2, %xmm5 332; X64-SSE-NEXT: andps %xmm4, %xmm0 333; X64-SSE-NEXT: orps %xmm5, %xmm0 334; X64-SSE-NEXT: andps %xmm4, %xmm1 335; X64-SSE-NEXT: andnps %xmm3, %xmm4 336; X64-SSE-NEXT: orps %xmm4, %xmm1 337; X64-SSE-NEXT: retq 338; 339; X64-AVX1-LABEL: fcopysign_v8f32: 340; X64-AVX1: # %bb.0: 341; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 342; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 343; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 344; X64-AVX1-NEXT: retq 345; 346; X64-AVX2-LABEL: fcopysign_v8f32: 347; X64-AVX2: # %bb.0: 348; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 349; X64-AVX2-NEXT: vandps %ymm2, %ymm1, %ymm1 350; X64-AVX2-NEXT: vbroadcastss {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 351; X64-AVX2-NEXT: vandps %ymm2, %ymm0, %ymm0 352; X64-AVX2-NEXT: vorps %ymm1, %ymm0, %ymm0 353; X64-AVX2-NEXT: retq 354; 355; X64-AVX512-LABEL: fcopysign_v8f32: 356; X64-AVX512: # %bb.0: 357; X64-AVX512-NEXT: vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm1, %ymm0 358; X64-AVX512-NEXT: retq 359 %t = call <8 x float> @llvm.copysign.v8f32(<8 x float> %a0, <8 x float> %a1) 360 ret <8 x float> %t 361} 362declare <8 x float> @llvm.copysign.v8f32(<8 x float>, <8 x float>) 363 364define <16 x half> @fcopysign_v16f16(ptr %p0, ptr %p1) nounwind { 365; X86-SSE-LABEL: fcopysign_v16f16: 366; X86-SSE: # %bb.0: 367; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 368; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 369; X86-SSE-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 370; X86-SSE-NEXT: movaps %xmm1, %xmm2 371; X86-SSE-NEXT: andnps (%ecx), %xmm2 372; X86-SSE-NEXT: movaps (%eax), %xmm0 373; X86-SSE-NEXT: andps %xmm1, %xmm0 374; X86-SSE-NEXT: orps %xmm2, %xmm0 375; X86-SSE-NEXT: movaps %xmm1, %xmm2 376; X86-SSE-NEXT: andnps 16(%ecx), %xmm2 377; X86-SSE-NEXT: andps 16(%eax), %xmm1 378; X86-SSE-NEXT: orps %xmm2, %xmm1 379; X86-SSE-NEXT: retl 380; 381; X86-AVX1-LABEL: fcopysign_v16f16: 382; X86-AVX1: # %bb.0: 383; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax 384; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx 385; X86-AVX1-NEXT: vmovups (%ecx), %ymm0 386; X86-AVX1-NEXT: vmovups (%eax), %ymm1 387; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 388; X86-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 389; X86-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 390; X86-AVX1-NEXT: retl 391; 392; X86-AVX2-LABEL: fcopysign_v16f16: 393; X86-AVX2: # %bb.0: 394; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 395; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx 396; X86-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 397; X86-AVX2-NEXT: vpand (%ecx), %ymm0, %ymm0 398; X86-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 399; X86-AVX2-NEXT: vpand (%eax), %ymm1, %ymm1 400; X86-AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 401; X86-AVX2-NEXT: retl 402; 403; X86-AVX512-LABEL: fcopysign_v16f16: 404; X86-AVX512: # %bb.0: 405; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax 406; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx 407; X86-AVX512-NEXT: vmovdqu (%ecx), %ymm1 408; X86-AVX512-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879] 409; X86-AVX512-NEXT: vpternlogd $202, (%eax), %ymm1, %ymm0 410; X86-AVX512-NEXT: retl 411; 412; X64-SSE-LABEL: fcopysign_v16f16: 413; X64-SSE: # %bb.0: 414; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 415; X64-SSE-NEXT: movaps %xmm1, %xmm2 416; X64-SSE-NEXT: andnps (%rsi), %xmm2 417; X64-SSE-NEXT: movaps (%rdi), %xmm0 418; X64-SSE-NEXT: andps %xmm1, %xmm0 419; X64-SSE-NEXT: orps %xmm2, %xmm0 420; X64-SSE-NEXT: movaps %xmm1, %xmm2 421; X64-SSE-NEXT: andnps 16(%rsi), %xmm2 422; X64-SSE-NEXT: andps 16(%rdi), %xmm1 423; X64-SSE-NEXT: orps %xmm2, %xmm1 424; X64-SSE-NEXT: retq 425; 426; X64-AVX1-LABEL: fcopysign_v16f16: 427; X64-AVX1: # %bb.0: 428; X64-AVX1-NEXT: vmovups (%rdi), %ymm0 429; X64-AVX1-NEXT: vmovups (%rsi), %ymm1 430; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 431; X64-AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 432; X64-AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 433; X64-AVX1-NEXT: retq 434; 435; X64-AVX2-LABEL: fcopysign_v16f16: 436; X64-AVX2: # %bb.0: 437; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm0 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0] 438; X64-AVX2-NEXT: vpand (%rsi), %ymm0, %ymm0 439; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 440; X64-AVX2-NEXT: vpand (%rdi), %ymm1, %ymm1 441; X64-AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 442; X64-AVX2-NEXT: retq 443; 444; X64-AVX512-LABEL: fcopysign_v16f16: 445; X64-AVX512: # %bb.0: 446; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm1 447; X64-AVX512-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879] 448; X64-AVX512-NEXT: vpternlogd $202, (%rsi), %ymm1, %ymm0 449; X64-AVX512-NEXT: retq 450 %a0 = load <16 x half>, ptr %p0, align 16 451 %a1 = load <16 x half>, ptr %p1, align 16 452 %t = call <16 x half> @llvm.copysign.v16f16(<16 x half> %a0, <16 x half> %a1) 453 ret <16 x half> %t 454} 455declare <16 x half> @llvm.copysign.v16f16(<16 x half>, <16 x half>) 456 457; 458; 512-bit Vectors 459; 460 461define <8 x double> @fcopysign_v8f64(<8 x double> %a0, <8 x double> %a1) nounwind { 462; X86-SSE-LABEL: fcopysign_v8f64: 463; X86-SSE: # %bb.0: 464; X86-SSE-NEXT: pushl %ebp 465; X86-SSE-NEXT: movl %esp, %ebp 466; X86-SSE-NEXT: andl $-16, %esp 467; X86-SSE-NEXT: subl $16, %esp 468; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [NaN,NaN] 469; X86-SSE-NEXT: andps %xmm3, %xmm0 470; X86-SSE-NEXT: movaps %xmm3, %xmm4 471; X86-SSE-NEXT: andnps 24(%ebp), %xmm4 472; X86-SSE-NEXT: orps %xmm4, %xmm0 473; X86-SSE-NEXT: andps %xmm3, %xmm1 474; X86-SSE-NEXT: movaps %xmm3, %xmm4 475; X86-SSE-NEXT: andnps 40(%ebp), %xmm4 476; X86-SSE-NEXT: orps %xmm4, %xmm1 477; X86-SSE-NEXT: andps %xmm3, %xmm2 478; X86-SSE-NEXT: movaps %xmm3, %xmm4 479; X86-SSE-NEXT: andnps 56(%ebp), %xmm4 480; X86-SSE-NEXT: orps %xmm4, %xmm2 481; X86-SSE-NEXT: movaps %xmm3, %xmm4 482; X86-SSE-NEXT: andnps 72(%ebp), %xmm4 483; X86-SSE-NEXT: andps 8(%ebp), %xmm3 484; X86-SSE-NEXT: orps %xmm4, %xmm3 485; X86-SSE-NEXT: movl %ebp, %esp 486; X86-SSE-NEXT: popl %ebp 487; X86-SSE-NEXT: retl 488; 489; X86-AVX1OR2-LABEL: fcopysign_v8f64: 490; X86-AVX1OR2: # %bb.0: 491; X86-AVX1OR2-NEXT: pushl %ebp 492; X86-AVX1OR2-NEXT: movl %esp, %ebp 493; X86-AVX1OR2-NEXT: andl $-32, %esp 494; X86-AVX1OR2-NEXT: subl $32, %esp 495; X86-AVX1OR2-NEXT: vbroadcastsd {{.*#+}} ymm3 = [NaN,NaN,NaN,NaN] 496; X86-AVX1OR2-NEXT: vandnps %ymm2, %ymm3, %ymm2 497; X86-AVX1OR2-NEXT: vandps %ymm3, %ymm0, %ymm0 498; X86-AVX1OR2-NEXT: vorps %ymm2, %ymm0, %ymm0 499; X86-AVX1OR2-NEXT: vandps %ymm3, %ymm1, %ymm1 500; X86-AVX1OR2-NEXT: vandnps 8(%ebp), %ymm3, %ymm2 501; X86-AVX1OR2-NEXT: vorps %ymm2, %ymm1, %ymm1 502; X86-AVX1OR2-NEXT: movl %ebp, %esp 503; X86-AVX1OR2-NEXT: popl %ebp 504; X86-AVX1OR2-NEXT: retl 505; 506; X86-AVX512-LABEL: fcopysign_v8f64: 507; X86-AVX512: # %bb.0: 508; X86-AVX512-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm1, %zmm0 509; X86-AVX512-NEXT: retl 510; 511; X64-SSE-LABEL: fcopysign_v8f64: 512; X64-SSE: # %bb.0: 513; X64-SSE-NEXT: movaps {{.*#+}} xmm8 = [NaN,NaN] 514; X64-SSE-NEXT: movaps %xmm8, %xmm9 515; X64-SSE-NEXT: andnps %xmm4, %xmm9 516; X64-SSE-NEXT: andps %xmm8, %xmm0 517; X64-SSE-NEXT: orps %xmm9, %xmm0 518; X64-SSE-NEXT: movaps %xmm8, %xmm4 519; X64-SSE-NEXT: andnps %xmm5, %xmm4 520; X64-SSE-NEXT: andps %xmm8, %xmm1 521; X64-SSE-NEXT: orps %xmm4, %xmm1 522; X64-SSE-NEXT: movaps %xmm8, %xmm4 523; X64-SSE-NEXT: andnps %xmm6, %xmm4 524; X64-SSE-NEXT: andps %xmm8, %xmm2 525; X64-SSE-NEXT: orps %xmm4, %xmm2 526; X64-SSE-NEXT: andps %xmm8, %xmm3 527; X64-SSE-NEXT: andnps %xmm7, %xmm8 528; X64-SSE-NEXT: orps %xmm8, %xmm3 529; X64-SSE-NEXT: retq 530; 531; X64-AVX1OR2-LABEL: fcopysign_v8f64: 532; X64-AVX1OR2: # %bb.0: 533; X64-AVX1OR2-NEXT: vbroadcastsd {{.*#+}} ymm4 = [NaN,NaN,NaN,NaN] 534; X64-AVX1OR2-NEXT: vandnps %ymm2, %ymm4, %ymm2 535; X64-AVX1OR2-NEXT: vandps %ymm4, %ymm0, %ymm0 536; X64-AVX1OR2-NEXT: vorps %ymm2, %ymm0, %ymm0 537; X64-AVX1OR2-NEXT: vandnps %ymm3, %ymm4, %ymm2 538; X64-AVX1OR2-NEXT: vandps %ymm4, %ymm1, %ymm1 539; X64-AVX1OR2-NEXT: vorps %ymm2, %ymm1, %ymm1 540; X64-AVX1OR2-NEXT: retq 541; 542; X64-AVX512-LABEL: fcopysign_v8f64: 543; X64-AVX512: # %bb.0: 544; X64-AVX512-NEXT: vpternlogq $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm1, %zmm0 545; X64-AVX512-NEXT: retq 546 %t = call <8 x double> @llvm.copysign.v8f64(<8 x double> %a0, <8 x double> %a1) 547 ret <8 x double> %t 548} 549declare <8 x double> @llvm.copysign.v8f64(<8 x double>, <8 x double>) 550 551define <16 x float> @fcopysign_v16f32(<16 x float> %a0, <16 x float> %a1) nounwind { 552; X86-SSE-LABEL: fcopysign_v16f32: 553; X86-SSE: # %bb.0: 554; X86-SSE-NEXT: pushl %ebp 555; X86-SSE-NEXT: movl %esp, %ebp 556; X86-SSE-NEXT: andl $-16, %esp 557; X86-SSE-NEXT: subl $16, %esp 558; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN] 559; X86-SSE-NEXT: andps %xmm3, %xmm0 560; X86-SSE-NEXT: movaps %xmm3, %xmm4 561; X86-SSE-NEXT: andnps 24(%ebp), %xmm4 562; X86-SSE-NEXT: orps %xmm4, %xmm0 563; X86-SSE-NEXT: andps %xmm3, %xmm1 564; X86-SSE-NEXT: movaps %xmm3, %xmm4 565; X86-SSE-NEXT: andnps 40(%ebp), %xmm4 566; X86-SSE-NEXT: orps %xmm4, %xmm1 567; X86-SSE-NEXT: andps %xmm3, %xmm2 568; X86-SSE-NEXT: movaps %xmm3, %xmm4 569; X86-SSE-NEXT: andnps 56(%ebp), %xmm4 570; X86-SSE-NEXT: orps %xmm4, %xmm2 571; X86-SSE-NEXT: movaps %xmm3, %xmm4 572; X86-SSE-NEXT: andnps 72(%ebp), %xmm4 573; X86-SSE-NEXT: andps 8(%ebp), %xmm3 574; X86-SSE-NEXT: orps %xmm4, %xmm3 575; X86-SSE-NEXT: movl %ebp, %esp 576; X86-SSE-NEXT: popl %ebp 577; X86-SSE-NEXT: retl 578; 579; X86-AVX1OR2-LABEL: fcopysign_v16f32: 580; X86-AVX1OR2: # %bb.0: 581; X86-AVX1OR2-NEXT: pushl %ebp 582; X86-AVX1OR2-NEXT: movl %esp, %ebp 583; X86-AVX1OR2-NEXT: andl $-32, %esp 584; X86-AVX1OR2-NEXT: subl $32, %esp 585; X86-AVX1OR2-NEXT: vbroadcastss {{.*#+}} ymm3 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 586; X86-AVX1OR2-NEXT: vandnps %ymm2, %ymm3, %ymm2 587; X86-AVX1OR2-NEXT: vandps %ymm3, %ymm0, %ymm0 588; X86-AVX1OR2-NEXT: vorps %ymm2, %ymm0, %ymm0 589; X86-AVX1OR2-NEXT: vandps %ymm3, %ymm1, %ymm1 590; X86-AVX1OR2-NEXT: vandnps 8(%ebp), %ymm3, %ymm2 591; X86-AVX1OR2-NEXT: vorps %ymm2, %ymm1, %ymm1 592; X86-AVX1OR2-NEXT: movl %ebp, %esp 593; X86-AVX1OR2-NEXT: popl %ebp 594; X86-AVX1OR2-NEXT: retl 595; 596; X86-AVX512-LABEL: fcopysign_v16f32: 597; X86-AVX512: # %bb.0: 598; X86-AVX512-NEXT: vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm1, %zmm0 599; X86-AVX512-NEXT: retl 600; 601; X64-SSE-LABEL: fcopysign_v16f32: 602; X64-SSE: # %bb.0: 603; X64-SSE-NEXT: movaps {{.*#+}} xmm8 = [NaN,NaN,NaN,NaN] 604; X64-SSE-NEXT: movaps %xmm8, %xmm9 605; X64-SSE-NEXT: andnps %xmm4, %xmm9 606; X64-SSE-NEXT: andps %xmm8, %xmm0 607; X64-SSE-NEXT: orps %xmm9, %xmm0 608; X64-SSE-NEXT: movaps %xmm8, %xmm4 609; X64-SSE-NEXT: andnps %xmm5, %xmm4 610; X64-SSE-NEXT: andps %xmm8, %xmm1 611; X64-SSE-NEXT: orps %xmm4, %xmm1 612; X64-SSE-NEXT: movaps %xmm8, %xmm4 613; X64-SSE-NEXT: andnps %xmm6, %xmm4 614; X64-SSE-NEXT: andps %xmm8, %xmm2 615; X64-SSE-NEXT: orps %xmm4, %xmm2 616; X64-SSE-NEXT: andps %xmm8, %xmm3 617; X64-SSE-NEXT: andnps %xmm7, %xmm8 618; X64-SSE-NEXT: orps %xmm8, %xmm3 619; X64-SSE-NEXT: retq 620; 621; X64-AVX1OR2-LABEL: fcopysign_v16f32: 622; X64-AVX1OR2: # %bb.0: 623; X64-AVX1OR2-NEXT: vbroadcastss {{.*#+}} ymm4 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 624; X64-AVX1OR2-NEXT: vandnps %ymm2, %ymm4, %ymm2 625; X64-AVX1OR2-NEXT: vandps %ymm4, %ymm0, %ymm0 626; X64-AVX1OR2-NEXT: vorps %ymm2, %ymm0, %ymm0 627; X64-AVX1OR2-NEXT: vandnps %ymm3, %ymm4, %ymm2 628; X64-AVX1OR2-NEXT: vandps %ymm4, %ymm1, %ymm1 629; X64-AVX1OR2-NEXT: vorps %ymm2, %ymm1, %ymm1 630; X64-AVX1OR2-NEXT: retq 631; 632; X64-AVX512-LABEL: fcopysign_v16f32: 633; X64-AVX512: # %bb.0: 634; X64-AVX512-NEXT: vpternlogd $228, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm0 635; X64-AVX512-NEXT: retq 636 %t = call <16 x float> @llvm.copysign.v16f32(<16 x float> %a0, <16 x float> %a1) 637 ret <16 x float> %t 638} 639declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>) 640 641define <32 x half> @fcopysign_v32f16(ptr %p0, ptr %p1) nounwind { 642; X86-SSE-LABEL: fcopysign_v32f16: 643; X86-SSE: # %bb.0: 644; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 645; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %ecx 646; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 647; X86-SSE-NEXT: movaps %xmm3, %xmm1 648; X86-SSE-NEXT: andnps (%ecx), %xmm1 649; X86-SSE-NEXT: movaps (%eax), %xmm0 650; X86-SSE-NEXT: andps %xmm3, %xmm0 651; X86-SSE-NEXT: orps %xmm1, %xmm0 652; X86-SSE-NEXT: movaps %xmm3, %xmm2 653; X86-SSE-NEXT: andnps 16(%ecx), %xmm2 654; X86-SSE-NEXT: movaps 16(%eax), %xmm1 655; X86-SSE-NEXT: andps %xmm3, %xmm1 656; X86-SSE-NEXT: orps %xmm2, %xmm1 657; X86-SSE-NEXT: movaps %xmm3, %xmm4 658; X86-SSE-NEXT: andnps 32(%ecx), %xmm4 659; X86-SSE-NEXT: movaps 32(%eax), %xmm2 660; X86-SSE-NEXT: andps %xmm3, %xmm2 661; X86-SSE-NEXT: orps %xmm4, %xmm2 662; X86-SSE-NEXT: movaps %xmm3, %xmm4 663; X86-SSE-NEXT: andnps 48(%ecx), %xmm4 664; X86-SSE-NEXT: andps 48(%eax), %xmm3 665; X86-SSE-NEXT: orps %xmm4, %xmm3 666; X86-SSE-NEXT: retl 667; 668; X86-AVX1-LABEL: fcopysign_v32f16: 669; X86-AVX1: # %bb.0: 670; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax 671; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx 672; X86-AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 673; X86-AVX1-NEXT: vandnps (%ecx), %ymm1, %ymm0 674; X86-AVX1-NEXT: vandps (%eax), %ymm1, %ymm2 675; X86-AVX1-NEXT: vorps %ymm0, %ymm2, %ymm0 676; X86-AVX1-NEXT: vandnps 32(%ecx), %ymm1, %ymm2 677; X86-AVX1-NEXT: vandps 32(%eax), %ymm1, %ymm1 678; X86-AVX1-NEXT: vorps %ymm2, %ymm1, %ymm1 679; X86-AVX1-NEXT: retl 680; 681; X86-AVX2-LABEL: fcopysign_v32f16: 682; X86-AVX2: # %bb.0: 683; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 684; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx 685; X86-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 686; X86-AVX2-NEXT: vpandn (%ecx), %ymm1, %ymm0 687; X86-AVX2-NEXT: vpand (%eax), %ymm1, %ymm2 688; X86-AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 689; X86-AVX2-NEXT: vpandn 32(%ecx), %ymm1, %ymm2 690; X86-AVX2-NEXT: vpand 32(%eax), %ymm1, %ymm1 691; X86-AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1 692; X86-AVX2-NEXT: retl 693; 694; X86-AVX512-LABEL: fcopysign_v32f16: 695; X86-AVX512: # %bb.0: 696; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax 697; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx 698; X86-AVX512-NEXT: vmovdqu64 (%ecx), %zmm1 699; X86-AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879] 700; X86-AVX512-NEXT: vpternlogd $202, (%eax), %zmm1, %zmm0 701; X86-AVX512-NEXT: retl 702; 703; X64-SSE-LABEL: fcopysign_v32f16: 704; X64-SSE: # %bb.0: 705; X64-SSE-NEXT: movaps {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 706; X64-SSE-NEXT: movaps %xmm3, %xmm1 707; X64-SSE-NEXT: andnps (%rsi), %xmm1 708; X64-SSE-NEXT: movaps (%rdi), %xmm0 709; X64-SSE-NEXT: andps %xmm3, %xmm0 710; X64-SSE-NEXT: orps %xmm1, %xmm0 711; X64-SSE-NEXT: movaps %xmm3, %xmm2 712; X64-SSE-NEXT: andnps 16(%rsi), %xmm2 713; X64-SSE-NEXT: movaps 16(%rdi), %xmm1 714; X64-SSE-NEXT: andps %xmm3, %xmm1 715; X64-SSE-NEXT: orps %xmm2, %xmm1 716; X64-SSE-NEXT: movaps %xmm3, %xmm4 717; X64-SSE-NEXT: andnps 32(%rsi), %xmm4 718; X64-SSE-NEXT: movaps 32(%rdi), %xmm2 719; X64-SSE-NEXT: andps %xmm3, %xmm2 720; X64-SSE-NEXT: orps %xmm4, %xmm2 721; X64-SSE-NEXT: movaps %xmm3, %xmm4 722; X64-SSE-NEXT: andnps 48(%rsi), %xmm4 723; X64-SSE-NEXT: andps 48(%rdi), %xmm3 724; X64-SSE-NEXT: orps %xmm4, %xmm3 725; X64-SSE-NEXT: retq 726; 727; X64-AVX1-LABEL: fcopysign_v32f16: 728; X64-AVX1: # %bb.0: 729; X64-AVX1-NEXT: vbroadcastss {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 730; X64-AVX1-NEXT: vandnps (%rsi), %ymm1, %ymm0 731; X64-AVX1-NEXT: vandps (%rdi), %ymm1, %ymm2 732; X64-AVX1-NEXT: vorps %ymm0, %ymm2, %ymm0 733; X64-AVX1-NEXT: vandnps 32(%rsi), %ymm1, %ymm2 734; X64-AVX1-NEXT: vandps 32(%rdi), %ymm1, %ymm1 735; X64-AVX1-NEXT: vorps %ymm2, %ymm1, %ymm1 736; X64-AVX1-NEXT: retq 737; 738; X64-AVX2-LABEL: fcopysign_v32f16: 739; X64-AVX2: # %bb.0: 740; X64-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm1 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 741; X64-AVX2-NEXT: vpandn (%rsi), %ymm1, %ymm0 742; X64-AVX2-NEXT: vpand (%rdi), %ymm1, %ymm2 743; X64-AVX2-NEXT: vpor %ymm0, %ymm2, %ymm0 744; X64-AVX2-NEXT: vpandn 32(%rsi), %ymm1, %ymm2 745; X64-AVX2-NEXT: vpand 32(%rdi), %ymm1, %ymm1 746; X64-AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1 747; X64-AVX2-NEXT: retq 748; 749; X64-AVX512-LABEL: fcopysign_v32f16: 750; X64-AVX512: # %bb.0: 751; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm1 752; X64-AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879] 753; X64-AVX512-NEXT: vpternlogd $202, (%rsi), %zmm1, %zmm0 754; X64-AVX512-NEXT: retq 755 %a0 = load <32 x half>, ptr %p0, align 16 756 %a1 = load <32 x half>, ptr %p1, align 16 757 %t = call <32 x half> @llvm.copysign.v32f16(<32 x half> %a0, <32 x half> %a1) 758 ret <32 x half> %t 759} 760declare <32 x half> @llvm.copysign.v32f16(<32 x half>, <32 x half>) 761;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 762; X64: {{.*}} 763; X64-AVX: {{.*}} 764; X64-AVX512FP16: {{.*}} 765; X64-AVX512VL: {{.*}} 766; X64-AVX512VLDQ: {{.*}} 767; X86: {{.*}} 768; X86-AVX: {{.*}} 769; X86-AVX512FP16: {{.*}} 770; X86-AVX512VL: {{.*}} 771; X86-AVX512VLDQ: {{.*}} 772