1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s 3 4; PR38527 - https://bugs.llvm.org/show_bug.cgi?id=38527 5 6; Use an AVX target to show that the potential problem 7; is not limited to 128-bit types/registers. Ie, widening 8; up to 256-bits may also result in bogus libcalls. 9 10; Use fsin as the representative test for various data types. 11 12declare <1 x float> @llvm.sin.v1f32(<1 x float>) 13declare <2 x float> @llvm.sin.v2f32(<2 x float>) 14declare <3 x float> @llvm.sin.v3f32(<3 x float>) 15declare <4 x float> @llvm.sin.v4f32(<4 x float>) 16declare <5 x float> @llvm.sin.v5f32(<5 x float>) 17declare <6 x float> @llvm.sin.v6f32(<6 x float>) 18declare <3 x double> @llvm.sin.v3f64(<3 x double>) 19 20declare <1 x float> @llvm.tan.v1f32(<1 x float>) 21declare <2 x float> @llvm.tan.v2f32(<2 x float>) 22declare <3 x float> @llvm.tan.v3f32(<3 x float>) 23declare <4 x float> @llvm.tan.v4f32(<4 x float>) 24declare <5 x float> @llvm.tan.v5f32(<5 x float>) 25declare <6 x float> @llvm.tan.v6f32(<6 x float>) 26declare <3 x double> @llvm.tan.v3f64(<3 x double>) 27 28declare <1 x float> @llvm.acos.v1f32(<1 x float>) 29declare <2 x float> @llvm.acos.v2f32(<2 x float>) 30declare <3 x float> @llvm.acos.v3f32(<3 x float>) 31declare <4 x float> @llvm.acos.v4f32(<4 x float>) 32declare <5 x float> @llvm.acos.v5f32(<5 x float>) 33declare <6 x float> @llvm.acos.v6f32(<6 x float>) 34declare <3 x double> @llvm.acos.v3f64(<3 x double 35>) 36declare <1 x float> @llvm.asin.v1f32(<1 x float>) 37declare <2 x float> @llvm.asin.v2f32(<2 x float>) 38declare <3 x float> @llvm.asin.v3f32(<3 x float>) 39declare <4 x float> @llvm.asin.v4f32(<4 x float>) 40declare <5 x float> @llvm.asin.v5f32(<5 x float>) 41declare <6 x float> @llvm.asin.v6f32(<6 x float>) 42declare <3 x double> @llvm.asin.v3f64(<3 x double>) 43 44declare <1 x float> @llvm.atan.v1f32(<1 x float>) 45declare <2 x float> @llvm.atan.v2f32(<2 x float>) 46declare <3 x float> @llvm.atan.v3f32(<3 x float>) 47declare <4 x float> @llvm.atan.v4f32(<4 x float>) 48declare <5 x float> @llvm.atan.v5f32(<5 x float>) 49declare <6 x float> @llvm.atan.v6f32(<6 x float>) 50declare <3 x double> @llvm.atan.v3f64(<3 x double>) 51 52declare <1 x float> @llvm.cosh.v1f32(<1 x float>) 53declare <2 x float> @llvm.cosh.v2f32(<2 x float>) 54declare <3 x float> @llvm.cosh.v3f32(<3 x float>) 55declare <4 x float> @llvm.cosh.v4f32(<4 x float>) 56declare <5 x float> @llvm.cosh.v5f32(<5 x float>) 57declare <6 x float> @llvm.cosh.v6f32(<6 x float>) 58declare <3 x double> @llvm.cosh.v3f64(<3 x double>) 59 60declare <1 x float> @llvm.sinh.v1f32(<1 x float>) 61declare <2 x float> @llvm.sinh.v2f32(<2 x float>) 62declare <3 x float> @llvm.sinh.v3f32(<3 x float>) 63declare <4 x float> @llvm.sinh.v4f32(<4 x float>) 64declare <5 x float> @llvm.sinh.v5f32(<5 x float>) 65declare <6 x float> @llvm.sinh.v6f32(<6 x float>) 66declare <3 x double> @llvm.sinh.v3f64(<3 x double>) 67 68declare <1 x float> @llvm.tanh.v1f32(<1 x float>) 69declare <2 x float> @llvm.tanh.v2f32(<2 x float>) 70declare <3 x float> @llvm.tanh.v3f32(<3 x float>) 71declare <4 x float> @llvm.tanh.v4f32(<4 x float>) 72declare <5 x float> @llvm.tanh.v5f32(<5 x float>) 73declare <6 x float> @llvm.tanh.v6f32(<6 x float>) 74declare <3 x double> @llvm.tanh.v3f64(<3 x double>) 75 76; Verify that all of the potential libcall candidates are handled. 77; Some of these have custom lowering, so those cases won't have 78; libcalls. 79 80declare <2 x float> @llvm.fabs.v2f32(<2 x float>) 81declare <2 x float> @llvm.ceil.v2f32(<2 x float>) 82declare <2 x float> @llvm.cos.v2f32(<2 x float>) 83declare <2 x float> @llvm.exp.v2f32(<2 x float>) 84declare <2 x float> @llvm.exp2.v2f32(<2 x float>) 85declare <2 x float> @llvm.floor.v2f32(<2 x float>) 86declare <2 x float> @llvm.log.v2f32(<2 x float>) 87declare <2 x float> @llvm.log10.v2f32(<2 x float>) 88declare <2 x float> @llvm.log2.v2f32(<2 x float>) 89declare <2 x float> @llvm.nearbyint.v2f32(<2 x float>) 90declare <2 x float> @llvm.rint.v2f32(<2 x float>) 91declare <2 x float> @llvm.round.v2f32(<2 x float>) 92declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) 93declare <2 x float> @llvm.trunc.v2f32(<2 x float>) 94 95define <1 x float> @sin_v1f32(<1 x float> %x) nounwind { 96; CHECK-LABEL: sin_v1f32: 97; CHECK: # %bb.0: 98; CHECK-NEXT: pushq %rax 99; CHECK-NEXT: callq sinf@PLT 100; CHECK-NEXT: popq %rax 101; CHECK-NEXT: retq 102 %r = call <1 x float> @llvm.sin.v1f32(<1 x float> %x) 103 ret <1 x float> %r 104} 105 106define <2 x float> @sin_v2f32(<2 x float> %x) nounwind { 107; CHECK-LABEL: sin_v2f32: 108; CHECK: # %bb.0: 109; CHECK-NEXT: subq $40, %rsp 110; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 111; CHECK-NEXT: callq sinf@PLT 112; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 113; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 114; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 115; CHECK-NEXT: callq sinf@PLT 116; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 117; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 118; CHECK-NEXT: addq $40, %rsp 119; CHECK-NEXT: retq 120 %r = call <2 x float> @llvm.sin.v2f32(<2 x float> %x) 121 ret <2 x float> %r 122} 123 124define <3 x float> @sin_v3f32(<3 x float> %x) nounwind { 125; CHECK-LABEL: sin_v3f32: 126; CHECK: # %bb.0: 127; CHECK-NEXT: subq $40, %rsp 128; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 129; CHECK-NEXT: callq sinf@PLT 130; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 131; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 132; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 133; CHECK-NEXT: callq sinf@PLT 134; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 135; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 136; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 137; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 138; CHECK-NEXT: # xmm0 = mem[1,0] 139; CHECK-NEXT: callq sinf@PLT 140; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 141; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 142; CHECK-NEXT: addq $40, %rsp 143; CHECK-NEXT: retq 144 %r = call <3 x float> @llvm.sin.v3f32(<3 x float> %x) 145 ret <3 x float> %r 146} 147 148define <4 x float> @sin_v4f32(<4 x float> %x) nounwind { 149; CHECK-LABEL: sin_v4f32: 150; CHECK: # %bb.0: 151; CHECK-NEXT: subq $40, %rsp 152; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 153; CHECK-NEXT: callq sinf@PLT 154; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 155; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 156; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 157; CHECK-NEXT: callq sinf@PLT 158; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 159; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 160; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 161; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 162; CHECK-NEXT: # xmm0 = mem[1,0] 163; CHECK-NEXT: callq sinf@PLT 164; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 165; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 166; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 167; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 168; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 169; CHECK-NEXT: callq sinf@PLT 170; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 171; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 172; CHECK-NEXT: addq $40, %rsp 173; CHECK-NEXT: retq 174 %r = call <4 x float> @llvm.sin.v4f32(<4 x float> %x) 175 ret <4 x float> %r 176} 177 178define <5 x float> @sin_v5f32(<5 x float> %x) nounwind { 179; CHECK-LABEL: sin_v5f32: 180; CHECK: # %bb.0: 181; CHECK-NEXT: subq $72, %rsp 182; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 183; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 184; CHECK-NEXT: vzeroupper 185; CHECK-NEXT: callq sinf@PLT 186; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 187; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 188; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 189; CHECK-NEXT: callq sinf@PLT 190; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 191; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 192; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 193; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 194; CHECK-NEXT: # xmm0 = mem[1,0] 195; CHECK-NEXT: callq sinf@PLT 196; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 197; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 198; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 199; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 200; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 201; CHECK-NEXT: callq sinf@PLT 202; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 203; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 204; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 205; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 206; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 207; CHECK-NEXT: vzeroupper 208; CHECK-NEXT: callq sinf@PLT 209; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 210; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 211; CHECK-NEXT: addq $72, %rsp 212; CHECK-NEXT: retq 213 %r = call <5 x float> @llvm.sin.v5f32(<5 x float> %x) 214 ret <5 x float> %r 215} 216 217define <6 x float> @sin_v6f32(<6 x float> %x) nounwind { 218; CHECK-LABEL: sin_v6f32: 219; CHECK: # %bb.0: 220; CHECK-NEXT: subq $72, %rsp 221; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 222; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 223; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 224; CHECK-NEXT: vzeroupper 225; CHECK-NEXT: callq sinf@PLT 226; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 227; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 228; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 229; CHECK-NEXT: callq sinf@PLT 230; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 231; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 232; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 233; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 234; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 235; CHECK-NEXT: vzeroupper 236; CHECK-NEXT: callq sinf@PLT 237; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 238; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 239; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 240; CHECK-NEXT: callq sinf@PLT 241; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 242; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 243; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 244; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 245; CHECK-NEXT: # xmm0 = mem[1,0] 246; CHECK-NEXT: callq sinf@PLT 247; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 248; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 249; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 250; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 251; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 252; CHECK-NEXT: callq sinf@PLT 253; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 254; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 255; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 256; CHECK-NEXT: addq $72, %rsp 257; CHECK-NEXT: retq 258 %r = call <6 x float> @llvm.sin.v6f32(<6 x float> %x) 259 ret <6 x float> %r 260} 261 262define <3 x double> @sin_v3f64(<3 x double> %x) nounwind { 263; CHECK-LABEL: sin_v3f64: 264; CHECK: # %bb.0: 265; CHECK-NEXT: subq $72, %rsp 266; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 267; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 268; CHECK-NEXT: vzeroupper 269; CHECK-NEXT: callq sin@PLT 270; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 271; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 272; CHECK-NEXT: # xmm0 = mem[1,0] 273; CHECK-NEXT: callq sin@PLT 274; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload 275; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 276; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 277; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 278; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 279; CHECK-NEXT: vzeroupper 280; CHECK-NEXT: callq sin@PLT 281; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 282; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 283; CHECK-NEXT: addq $72, %rsp 284; CHECK-NEXT: retq 285 %r = call <3 x double> @llvm.sin.v3f64(<3 x double> %x) 286 ret <3 x double> %r 287} 288 289define <1 x float> @tan_v1f32(<1 x float> %x) nounwind { 290; CHECK-LABEL: tan_v1f32: 291; CHECK: # %bb.0: 292; CHECK-NEXT: pushq %rax 293; CHECK-NEXT: callq tanf@PLT 294; CHECK-NEXT: popq %rax 295; CHECK-NEXT: retq 296 %r = call <1 x float> @llvm.tan.v1f32(<1 x float> %x) 297 ret <1 x float> %r 298} 299 300define <2 x float> @tan_v2f32(<2 x float> %x) nounwind { 301; CHECK-LABEL: tan_v2f32: 302; CHECK: # %bb.0: 303; CHECK-NEXT: subq $40, %rsp 304; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 305; CHECK-NEXT: callq tanf@PLT 306; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 307; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 308; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 309; CHECK-NEXT: callq tanf@PLT 310; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 311; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 312; CHECK-NEXT: addq $40, %rsp 313; CHECK-NEXT: retq 314 %r = call <2 x float> @llvm.tan.v2f32(<2 x float> %x) 315 ret <2 x float> %r 316} 317 318define <3 x float> @tan_v3f32(<3 x float> %x) nounwind { 319; CHECK-LABEL: tan_v3f32: 320; CHECK: # %bb.0: 321; CHECK-NEXT: subq $40, %rsp 322; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 323; CHECK-NEXT: callq tanf@PLT 324; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 325; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 326; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 327; CHECK-NEXT: callq tanf@PLT 328; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 329; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 330; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 331; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 332; CHECK-NEXT: # xmm0 = mem[1,0] 333; CHECK-NEXT: callq tanf@PLT 334; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 335; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 336; CHECK-NEXT: addq $40, %rsp 337; CHECK-NEXT: retq 338 %r = call <3 x float> @llvm.tan.v3f32(<3 x float> %x) 339 ret <3 x float> %r 340} 341 342define <4 x float> @tan_v4f32(<4 x float> %x) nounwind { 343; CHECK-LABEL: tan_v4f32: 344; CHECK: # %bb.0: 345; CHECK-NEXT: subq $40, %rsp 346; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 347; CHECK-NEXT: callq tanf@PLT 348; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 349; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 350; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 351; CHECK-NEXT: callq tanf@PLT 352; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 353; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 354; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 355; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 356; CHECK-NEXT: # xmm0 = mem[1,0] 357; CHECK-NEXT: callq tanf@PLT 358; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 359; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 360; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 361; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 362; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 363; CHECK-NEXT: callq tanf@PLT 364; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 365; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 366; CHECK-NEXT: addq $40, %rsp 367; CHECK-NEXT: retq 368 %r = call <4 x float> @llvm.tan.v4f32(<4 x float> %x) 369 ret <4 x float> %r 370} 371 372define <5 x float> @tan_v5f32(<5 x float> %x) nounwind { 373; CHECK-LABEL: tan_v5f32: 374; CHECK: # %bb.0: 375; CHECK-NEXT: subq $72, %rsp 376; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 377; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 378; CHECK-NEXT: vzeroupper 379; CHECK-NEXT: callq tanf@PLT 380; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 381; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 382; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 383; CHECK-NEXT: callq tanf@PLT 384; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 385; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 386; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 387; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 388; CHECK-NEXT: # xmm0 = mem[1,0] 389; CHECK-NEXT: callq tanf@PLT 390; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 391; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 392; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 393; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 394; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 395; CHECK-NEXT: callq tanf@PLT 396; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 397; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 398; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 399; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 400; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 401; CHECK-NEXT: vzeroupper 402; CHECK-NEXT: callq tanf@PLT 403; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 404; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 405; CHECK-NEXT: addq $72, %rsp 406; CHECK-NEXT: retq 407 %r = call <5 x float> @llvm.tan.v5f32(<5 x float> %x) 408 ret <5 x float> %r 409} 410 411define <6 x float> @tan_v6f32(<6 x float> %x) nounwind { 412; CHECK-LABEL: tan_v6f32: 413; CHECK: # %bb.0: 414; CHECK-NEXT: subq $72, %rsp 415; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 416; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 417; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 418; CHECK-NEXT: vzeroupper 419; CHECK-NEXT: callq tanf@PLT 420; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 421; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 422; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 423; CHECK-NEXT: callq tanf@PLT 424; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 425; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 426; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 427; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 428; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 429; CHECK-NEXT: vzeroupper 430; CHECK-NEXT: callq tanf@PLT 431; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 432; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 433; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 434; CHECK-NEXT: callq tanf@PLT 435; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 436; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 437; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 438; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 439; CHECK-NEXT: # xmm0 = mem[1,0] 440; CHECK-NEXT: callq tanf@PLT 441; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 442; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 443; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 444; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 445; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 446; CHECK-NEXT: callq tanf@PLT 447; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 448; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 449; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 450; CHECK-NEXT: addq $72, %rsp 451; CHECK-NEXT: retq 452 %r = call <6 x float> @llvm.tan.v6f32(<6 x float> %x) 453 ret <6 x float> %r 454} 455 456define <3 x double> @tan_v3f64(<3 x double> %x) nounwind { 457; CHECK-LABEL: tan_v3f64: 458; CHECK: # %bb.0: 459; CHECK-NEXT: subq $72, %rsp 460; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 461; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 462; CHECK-NEXT: vzeroupper 463; CHECK-NEXT: callq tan@PLT 464; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 465; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 466; CHECK-NEXT: # xmm0 = mem[1,0] 467; CHECK-NEXT: callq tan@PLT 468; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload 469; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 470; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 471; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 472; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 473; CHECK-NEXT: vzeroupper 474; CHECK-NEXT: callq tan@PLT 475; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 476; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 477; CHECK-NEXT: addq $72, %rsp 478; CHECK-NEXT: retq 479 %r = call <3 x double> @llvm.tan.v3f64(<3 x double> %x) 480 ret <3 x double> %r 481} 482 483define <1 x float> @acos_v1f32(<1 x float> %x) nounwind { 484; CHECK-LABEL: acos_v1f32: 485; CHECK: # %bb.0: 486; CHECK-NEXT: pushq %rax 487; CHECK-NEXT: callq acosf@PLT 488; CHECK-NEXT: popq %rax 489; CHECK-NEXT: retq 490 %r = call <1 x float> @llvm.acos.v1f32(<1 x float> %x) 491 ret <1 x float> %r 492} 493 494define <2 x float> @acos_v2f32(<2 x float> %x) nounwind { 495; CHECK-LABEL: acos_v2f32: 496; CHECK: # %bb.0: 497; CHECK-NEXT: subq $40, %rsp 498; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 499; CHECK-NEXT: callq acosf@PLT 500; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 501; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 502; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 503; CHECK-NEXT: callq acosf@PLT 504; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 505; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 506; CHECK-NEXT: addq $40, %rsp 507; CHECK-NEXT: retq 508 %r = call <2 x float> @llvm.acos.v2f32(<2 x float> %x) 509 ret <2 x float> %r 510} 511 512define <3 x float> @acos_v3f32(<3 x float> %x) nounwind { 513; CHECK-LABEL: acos_v3f32: 514; CHECK: # %bb.0: 515; CHECK-NEXT: subq $40, %rsp 516; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 517; CHECK-NEXT: callq acosf@PLT 518; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 519; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 520; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 521; CHECK-NEXT: callq acosf@PLT 522; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 523; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 524; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 525; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 526; CHECK-NEXT: # xmm0 = mem[1,0] 527; CHECK-NEXT: callq acosf@PLT 528; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 529; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 530; CHECK-NEXT: addq $40, %rsp 531; CHECK-NEXT: retq 532 %r = call <3 x float> @llvm.acos.v3f32(<3 x float> %x) 533 ret <3 x float> %r 534} 535 536define <4 x float> @acos_v4f32(<4 x float> %x) nounwind { 537; CHECK-LABEL: acos_v4f32: 538; CHECK: # %bb.0: 539; CHECK-NEXT: subq $40, %rsp 540; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 541; CHECK-NEXT: callq acosf@PLT 542; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 543; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 544; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 545; CHECK-NEXT: callq acosf@PLT 546; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 547; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 548; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 549; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 550; CHECK-NEXT: # xmm0 = mem[1,0] 551; CHECK-NEXT: callq acosf@PLT 552; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 553; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 554; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 555; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 556; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 557; CHECK-NEXT: callq acosf@PLT 558; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 559; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 560; CHECK-NEXT: addq $40, %rsp 561; CHECK-NEXT: retq 562 %r = call <4 x float> @llvm.acos.v4f32(<4 x float> %x) 563 ret <4 x float> %r 564} 565 566define <5 x float> @acos_v5f32(<5 x float> %x) nounwind { 567; CHECK-LABEL: acos_v5f32: 568; CHECK: # %bb.0: 569; CHECK-NEXT: subq $72, %rsp 570; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 571; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 572; CHECK-NEXT: vzeroupper 573; CHECK-NEXT: callq acosf@PLT 574; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 575; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 576; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 577; CHECK-NEXT: callq acosf@PLT 578; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 579; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 580; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 581; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 582; CHECK-NEXT: # xmm0 = mem[1,0] 583; CHECK-NEXT: callq acosf@PLT 584; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 585; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 586; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 587; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 588; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 589; CHECK-NEXT: callq acosf@PLT 590; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 591; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 592; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 593; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 594; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 595; CHECK-NEXT: vzeroupper 596; CHECK-NEXT: callq acosf@PLT 597; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 598; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 599; CHECK-NEXT: addq $72, %rsp 600; CHECK-NEXT: retq 601 %r = call <5 x float> @llvm.acos.v5f32(<5 x float> %x) 602 ret <5 x float> %r 603} 604 605define <6 x float> @acos_v6f32(<6 x float> %x) nounwind { 606; CHECK-LABEL: acos_v6f32: 607; CHECK: # %bb.0: 608; CHECK-NEXT: subq $72, %rsp 609; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 610; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 611; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 612; CHECK-NEXT: vzeroupper 613; CHECK-NEXT: callq acosf@PLT 614; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 615; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 616; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 617; CHECK-NEXT: callq acosf@PLT 618; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 619; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 620; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 621; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 622; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 623; CHECK-NEXT: vzeroupper 624; CHECK-NEXT: callq acosf@PLT 625; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 626; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 627; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 628; CHECK-NEXT: callq acosf@PLT 629; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 630; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 631; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 632; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 633; CHECK-NEXT: # xmm0 = mem[1,0] 634; CHECK-NEXT: callq acosf@PLT 635; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 636; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 637; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 638; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 639; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 640; CHECK-NEXT: callq acosf@PLT 641; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 642; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 643; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 644; CHECK-NEXT: addq $72, %rsp 645; CHECK-NEXT: retq 646 %r = call <6 x float> @llvm.acos.v6f32(<6 x float> %x) 647 ret <6 x float> %r 648} 649 650define <3 x double> @acos_v3f64(<3 x double> %x) nounwind { 651; CHECK-LABEL: acos_v3f64: 652; CHECK: # %bb.0: 653; CHECK-NEXT: subq $72, %rsp 654; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 655; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 656; CHECK-NEXT: vzeroupper 657; CHECK-NEXT: callq acos@PLT 658; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 659; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 660; CHECK-NEXT: # xmm0 = mem[1,0] 661; CHECK-NEXT: callq acos@PLT 662; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload 663; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 664; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 665; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 666; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 667; CHECK-NEXT: vzeroupper 668; CHECK-NEXT: callq acos@PLT 669; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 670; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 671; CHECK-NEXT: addq $72, %rsp 672; CHECK-NEXT: retq 673 %r = call <3 x double> @llvm.acos.v3f64(<3 x double> %x) 674 ret <3 x double> %r 675} 676 677define <1 x float> @asin_v1f32(<1 x float> %x) nounwind { 678; CHECK-LABEL: asin_v1f32: 679; CHECK: # %bb.0: 680; CHECK-NEXT: pushq %rax 681; CHECK-NEXT: callq asinf@PLT 682; CHECK-NEXT: popq %rax 683; CHECK-NEXT: retq 684 %r = call <1 x float> @llvm.asin.v1f32(<1 x float> %x) 685 ret <1 x float> %r 686} 687 688define <2 x float> @asin_v2f32(<2 x float> %x) nounwind { 689; CHECK-LABEL: asin_v2f32: 690; CHECK: # %bb.0: 691; CHECK-NEXT: subq $40, %rsp 692; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 693; CHECK-NEXT: callq asinf@PLT 694; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 695; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 696; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 697; CHECK-NEXT: callq asinf@PLT 698; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 699; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 700; CHECK-NEXT: addq $40, %rsp 701; CHECK-NEXT: retq 702 %r = call <2 x float> @llvm.asin.v2f32(<2 x float> %x) 703 ret <2 x float> %r 704} 705 706define <3 x float> @asin_v3f32(<3 x float> %x) nounwind { 707; CHECK-LABEL: asin_v3f32: 708; CHECK: # %bb.0: 709; CHECK-NEXT: subq $40, %rsp 710; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 711; CHECK-NEXT: callq asinf@PLT 712; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 713; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 714; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 715; CHECK-NEXT: callq asinf@PLT 716; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 717; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 718; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 719; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 720; CHECK-NEXT: # xmm0 = mem[1,0] 721; CHECK-NEXT: callq asinf@PLT 722; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 723; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 724; CHECK-NEXT: addq $40, %rsp 725; CHECK-NEXT: retq 726 %r = call <3 x float> @llvm.asin.v3f32(<3 x float> %x) 727 ret <3 x float> %r 728} 729 730define <4 x float> @asin_v4f32(<4 x float> %x) nounwind { 731; CHECK-LABEL: asin_v4f32: 732; CHECK: # %bb.0: 733; CHECK-NEXT: subq $40, %rsp 734; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 735; CHECK-NEXT: callq asinf@PLT 736; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 737; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 738; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 739; CHECK-NEXT: callq asinf@PLT 740; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 741; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 742; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 743; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 744; CHECK-NEXT: # xmm0 = mem[1,0] 745; CHECK-NEXT: callq asinf@PLT 746; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 747; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 748; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 749; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 750; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 751; CHECK-NEXT: callq asinf@PLT 752; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 753; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 754; CHECK-NEXT: addq $40, %rsp 755; CHECK-NEXT: retq 756 %r = call <4 x float> @llvm.asin.v4f32(<4 x float> %x) 757 ret <4 x float> %r 758} 759 760define <5 x float> @asin_v5f32(<5 x float> %x) nounwind { 761; CHECK-LABEL: asin_v5f32: 762; CHECK: # %bb.0: 763; CHECK-NEXT: subq $72, %rsp 764; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 765; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 766; CHECK-NEXT: vzeroupper 767; CHECK-NEXT: callq asinf@PLT 768; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 769; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 770; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 771; CHECK-NEXT: callq asinf@PLT 772; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 773; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 774; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 775; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 776; CHECK-NEXT: # xmm0 = mem[1,0] 777; CHECK-NEXT: callq asinf@PLT 778; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 779; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 780; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 781; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 782; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 783; CHECK-NEXT: callq asinf@PLT 784; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 785; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 786; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 787; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 788; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 789; CHECK-NEXT: vzeroupper 790; CHECK-NEXT: callq asinf@PLT 791; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 792; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 793; CHECK-NEXT: addq $72, %rsp 794; CHECK-NEXT: retq 795 %r = call <5 x float> @llvm.asin.v5f32(<5 x float> %x) 796 ret <5 x float> %r 797} 798 799define <6 x float> @asin_v6f32(<6 x float> %x) nounwind { 800; CHECK-LABEL: asin_v6f32: 801; CHECK: # %bb.0: 802; CHECK-NEXT: subq $72, %rsp 803; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 804; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 805; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 806; CHECK-NEXT: vzeroupper 807; CHECK-NEXT: callq asinf@PLT 808; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 809; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 810; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 811; CHECK-NEXT: callq asinf@PLT 812; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 813; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 814; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 815; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 816; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 817; CHECK-NEXT: vzeroupper 818; CHECK-NEXT: callq asinf@PLT 819; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 820; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 821; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 822; CHECK-NEXT: callq asinf@PLT 823; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 824; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 825; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 826; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 827; CHECK-NEXT: # xmm0 = mem[1,0] 828; CHECK-NEXT: callq asinf@PLT 829; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 830; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 831; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 832; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 833; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 834; CHECK-NEXT: callq asinf@PLT 835; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 836; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 837; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 838; CHECK-NEXT: addq $72, %rsp 839; CHECK-NEXT: retq 840 %r = call <6 x float> @llvm.asin.v6f32(<6 x float> %x) 841 ret <6 x float> %r 842} 843 844define <3 x double> @asin_v3f64(<3 x double> %x) nounwind { 845; CHECK-LABEL: asin_v3f64: 846; CHECK: # %bb.0: 847; CHECK-NEXT: subq $72, %rsp 848; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 849; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 850; CHECK-NEXT: vzeroupper 851; CHECK-NEXT: callq asin@PLT 852; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 853; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 854; CHECK-NEXT: # xmm0 = mem[1,0] 855; CHECK-NEXT: callq asin@PLT 856; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload 857; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 858; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 859; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 860; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 861; CHECK-NEXT: vzeroupper 862; CHECK-NEXT: callq asin@PLT 863; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 864; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 865; CHECK-NEXT: addq $72, %rsp 866; CHECK-NEXT: retq 867 %r = call <3 x double> @llvm.asin.v3f64(<3 x double> %x) 868 ret <3 x double> %r 869} 870 871define <1 x float> @atan_v1f32(<1 x float> %x) nounwind { 872; CHECK-LABEL: atan_v1f32: 873; CHECK: # %bb.0: 874; CHECK-NEXT: pushq %rax 875; CHECK-NEXT: callq atanf@PLT 876; CHECK-NEXT: popq %rax 877; CHECK-NEXT: retq 878 %r = call <1 x float> @llvm.atan.v1f32(<1 x float> %x) 879 ret <1 x float> %r 880} 881 882define <2 x float> @atan_v2f32(<2 x float> %x) nounwind { 883; CHECK-LABEL: atan_v2f32: 884; CHECK: # %bb.0: 885; CHECK-NEXT: subq $40, %rsp 886; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 887; CHECK-NEXT: callq atanf@PLT 888; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 889; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 890; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 891; CHECK-NEXT: callq atanf@PLT 892; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 893; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 894; CHECK-NEXT: addq $40, %rsp 895; CHECK-NEXT: retq 896 %r = call <2 x float> @llvm.atan.v2f32(<2 x float> %x) 897 ret <2 x float> %r 898} 899 900define <3 x float> @atan_v3f32(<3 x float> %x) nounwind { 901; CHECK-LABEL: atan_v3f32: 902; CHECK: # %bb.0: 903; CHECK-NEXT: subq $40, %rsp 904; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 905; CHECK-NEXT: callq atanf@PLT 906; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 907; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 908; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 909; CHECK-NEXT: callq atanf@PLT 910; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 911; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 912; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 913; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 914; CHECK-NEXT: # xmm0 = mem[1,0] 915; CHECK-NEXT: callq atanf@PLT 916; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 917; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 918; CHECK-NEXT: addq $40, %rsp 919; CHECK-NEXT: retq 920 %r = call <3 x float> @llvm.atan.v3f32(<3 x float> %x) 921 ret <3 x float> %r 922} 923 924define <4 x float> @atan_v4f32(<4 x float> %x) nounwind { 925; CHECK-LABEL: atan_v4f32: 926; CHECK: # %bb.0: 927; CHECK-NEXT: subq $40, %rsp 928; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 929; CHECK-NEXT: callq atanf@PLT 930; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 931; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 932; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 933; CHECK-NEXT: callq atanf@PLT 934; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 935; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 936; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 937; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 938; CHECK-NEXT: # xmm0 = mem[1,0] 939; CHECK-NEXT: callq atanf@PLT 940; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 941; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 942; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 943; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 944; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 945; CHECK-NEXT: callq atanf@PLT 946; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 947; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 948; CHECK-NEXT: addq $40, %rsp 949; CHECK-NEXT: retq 950 %r = call <4 x float> @llvm.atan.v4f32(<4 x float> %x) 951 ret <4 x float> %r 952} 953 954define <5 x float> @atan_v5f32(<5 x float> %x) nounwind { 955; CHECK-LABEL: atan_v5f32: 956; CHECK: # %bb.0: 957; CHECK-NEXT: subq $72, %rsp 958; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 959; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 960; CHECK-NEXT: vzeroupper 961; CHECK-NEXT: callq atanf@PLT 962; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 963; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 964; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 965; CHECK-NEXT: callq atanf@PLT 966; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 967; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 968; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 969; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 970; CHECK-NEXT: # xmm0 = mem[1,0] 971; CHECK-NEXT: callq atanf@PLT 972; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 973; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 974; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 975; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 976; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 977; CHECK-NEXT: callq atanf@PLT 978; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 979; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 980; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 981; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 982; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 983; CHECK-NEXT: vzeroupper 984; CHECK-NEXT: callq atanf@PLT 985; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 986; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 987; CHECK-NEXT: addq $72, %rsp 988; CHECK-NEXT: retq 989 %r = call <5 x float> @llvm.atan.v5f32(<5 x float> %x) 990 ret <5 x float> %r 991} 992 993define <6 x float> @atan_v6f32(<6 x float> %x) nounwind { 994; CHECK-LABEL: atan_v6f32: 995; CHECK: # %bb.0: 996; CHECK-NEXT: subq $72, %rsp 997; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 998; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 999; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1000; CHECK-NEXT: vzeroupper 1001; CHECK-NEXT: callq atanf@PLT 1002; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1003; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 1004; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1005; CHECK-NEXT: callq atanf@PLT 1006; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1007; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1008; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1009; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1010; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1011; CHECK-NEXT: vzeroupper 1012; CHECK-NEXT: callq atanf@PLT 1013; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1014; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1015; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1016; CHECK-NEXT: callq atanf@PLT 1017; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1018; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1019; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1020; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1021; CHECK-NEXT: # xmm0 = mem[1,0] 1022; CHECK-NEXT: callq atanf@PLT 1023; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1024; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 1025; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1026; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1027; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1028; CHECK-NEXT: callq atanf@PLT 1029; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1030; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1031; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1032; CHECK-NEXT: addq $72, %rsp 1033; CHECK-NEXT: retq 1034 %r = call <6 x float> @llvm.atan.v6f32(<6 x float> %x) 1035 ret <6 x float> %r 1036} 1037 1038define <3 x double> @atan_v3f64(<3 x double> %x) nounwind { 1039; CHECK-LABEL: atan_v3f64: 1040; CHECK: # %bb.0: 1041; CHECK-NEXT: subq $72, %rsp 1042; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1043; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1044; CHECK-NEXT: vzeroupper 1045; CHECK-NEXT: callq atan@PLT 1046; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1047; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1048; CHECK-NEXT: # xmm0 = mem[1,0] 1049; CHECK-NEXT: callq atan@PLT 1050; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload 1051; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1052; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 1053; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1054; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 1055; CHECK-NEXT: vzeroupper 1056; CHECK-NEXT: callq atan@PLT 1057; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 1058; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1059; CHECK-NEXT: addq $72, %rsp 1060; CHECK-NEXT: retq 1061 %r = call <3 x double> @llvm.atan.v3f64(<3 x double> %x) 1062 ret <3 x double> %r 1063} 1064 1065define <1 x float> @cosh_v1f32(<1 x float> %x) nounwind { 1066; CHECK-LABEL: cosh_v1f32: 1067; CHECK: # %bb.0: 1068; CHECK-NEXT: pushq %rax 1069; CHECK-NEXT: callq coshf@PLT 1070; CHECK-NEXT: popq %rax 1071; CHECK-NEXT: retq 1072 %r = call <1 x float> @llvm.cosh.v1f32(<1 x float> %x) 1073 ret <1 x float> %r 1074} 1075 1076define <2 x float> @cosh_v2f32(<2 x float> %x) nounwind { 1077; CHECK-LABEL: cosh_v2f32: 1078; CHECK: # %bb.0: 1079; CHECK-NEXT: subq $40, %rsp 1080; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1081; CHECK-NEXT: callq coshf@PLT 1082; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1083; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 1084; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1085; CHECK-NEXT: callq coshf@PLT 1086; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1087; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1088; CHECK-NEXT: addq $40, %rsp 1089; CHECK-NEXT: retq 1090 %r = call <2 x float> @llvm.cosh.v2f32(<2 x float> %x) 1091 ret <2 x float> %r 1092} 1093 1094define <3 x float> @cosh_v3f32(<3 x float> %x) nounwind { 1095; CHECK-LABEL: cosh_v3f32: 1096; CHECK: # %bb.0: 1097; CHECK-NEXT: subq $40, %rsp 1098; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1099; CHECK-NEXT: callq coshf@PLT 1100; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1101; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1102; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1103; CHECK-NEXT: callq coshf@PLT 1104; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1105; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1106; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1107; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1108; CHECK-NEXT: # xmm0 = mem[1,0] 1109; CHECK-NEXT: callq coshf@PLT 1110; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1111; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 1112; CHECK-NEXT: addq $40, %rsp 1113; CHECK-NEXT: retq 1114 %r = call <3 x float> @llvm.cosh.v3f32(<3 x float> %x) 1115 ret <3 x float> %r 1116} 1117 1118define <4 x float> @cosh_v4f32(<4 x float> %x) nounwind { 1119; CHECK-LABEL: cosh_v4f32: 1120; CHECK: # %bb.0: 1121; CHECK-NEXT: subq $40, %rsp 1122; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1123; CHECK-NEXT: callq coshf@PLT 1124; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1125; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1126; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1127; CHECK-NEXT: callq coshf@PLT 1128; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1129; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1130; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1131; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1132; CHECK-NEXT: # xmm0 = mem[1,0] 1133; CHECK-NEXT: callq coshf@PLT 1134; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1135; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 1136; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1137; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1138; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1139; CHECK-NEXT: callq coshf@PLT 1140; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1141; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1142; CHECK-NEXT: addq $40, %rsp 1143; CHECK-NEXT: retq 1144 %r = call <4 x float> @llvm.cosh.v4f32(<4 x float> %x) 1145 ret <4 x float> %r 1146} 1147 1148define <5 x float> @cosh_v5f32(<5 x float> %x) nounwind { 1149; CHECK-LABEL: cosh_v5f32: 1150; CHECK: # %bb.0: 1151; CHECK-NEXT: subq $72, %rsp 1152; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1153; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1154; CHECK-NEXT: vzeroupper 1155; CHECK-NEXT: callq coshf@PLT 1156; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1157; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1158; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1159; CHECK-NEXT: callq coshf@PLT 1160; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1161; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1162; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1163; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1164; CHECK-NEXT: # xmm0 = mem[1,0] 1165; CHECK-NEXT: callq coshf@PLT 1166; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1167; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 1168; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1169; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1170; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1171; CHECK-NEXT: callq coshf@PLT 1172; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1173; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1174; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 1175; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1176; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 1177; CHECK-NEXT: vzeroupper 1178; CHECK-NEXT: callq coshf@PLT 1179; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 1180; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1181; CHECK-NEXT: addq $72, %rsp 1182; CHECK-NEXT: retq 1183 %r = call <5 x float> @llvm.cosh.v5f32(<5 x float> %x) 1184 ret <5 x float> %r 1185} 1186 1187define <6 x float> @cosh_v6f32(<6 x float> %x) nounwind { 1188; CHECK-LABEL: cosh_v6f32: 1189; CHECK: # %bb.0: 1190; CHECK-NEXT: subq $72, %rsp 1191; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1192; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 1193; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1194; CHECK-NEXT: vzeroupper 1195; CHECK-NEXT: callq coshf@PLT 1196; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1197; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 1198; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1199; CHECK-NEXT: callq coshf@PLT 1200; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1201; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1202; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1203; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1204; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1205; CHECK-NEXT: vzeroupper 1206; CHECK-NEXT: callq coshf@PLT 1207; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1208; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1209; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1210; CHECK-NEXT: callq coshf@PLT 1211; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1212; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1213; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1214; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1215; CHECK-NEXT: # xmm0 = mem[1,0] 1216; CHECK-NEXT: callq coshf@PLT 1217; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1218; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 1219; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1220; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1221; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1222; CHECK-NEXT: callq coshf@PLT 1223; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1224; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1225; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1226; CHECK-NEXT: addq $72, %rsp 1227; CHECK-NEXT: retq 1228 %r = call <6 x float> @llvm.cosh.v6f32(<6 x float> %x) 1229 ret <6 x float> %r 1230} 1231 1232define <3 x double> @cosh_v3f64(<3 x double> %x) nounwind { 1233; CHECK-LABEL: cosh_v3f64: 1234; CHECK: # %bb.0: 1235; CHECK-NEXT: subq $72, %rsp 1236; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1237; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1238; CHECK-NEXT: vzeroupper 1239; CHECK-NEXT: callq cosh@PLT 1240; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1241; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1242; CHECK-NEXT: # xmm0 = mem[1,0] 1243; CHECK-NEXT: callq cosh@PLT 1244; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload 1245; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1246; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 1247; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1248; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 1249; CHECK-NEXT: vzeroupper 1250; CHECK-NEXT: callq cosh@PLT 1251; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 1252; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1253; CHECK-NEXT: addq $72, %rsp 1254; CHECK-NEXT: retq 1255 %r = call <3 x double> @llvm.cosh.v3f64(<3 x double> %x) 1256 ret <3 x double> %r 1257} 1258 1259define <1 x float> @sinh_v1f32(<1 x float> %x) nounwind { 1260; CHECK-LABEL: sinh_v1f32: 1261; CHECK: # %bb.0: 1262; CHECK-NEXT: pushq %rax 1263; CHECK-NEXT: callq sinhf@PLT 1264; CHECK-NEXT: popq %rax 1265; CHECK-NEXT: retq 1266 %r = call <1 x float> @llvm.sinh.v1f32(<1 x float> %x) 1267 ret <1 x float> %r 1268} 1269 1270define <2 x float> @sinh_v2f32(<2 x float> %x) nounwind { 1271; CHECK-LABEL: sinh_v2f32: 1272; CHECK: # %bb.0: 1273; CHECK-NEXT: subq $40, %rsp 1274; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1275; CHECK-NEXT: callq sinhf@PLT 1276; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1277; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 1278; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1279; CHECK-NEXT: callq sinhf@PLT 1280; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1281; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1282; CHECK-NEXT: addq $40, %rsp 1283; CHECK-NEXT: retq 1284 %r = call <2 x float> @llvm.sinh.v2f32(<2 x float> %x) 1285 ret <2 x float> %r 1286} 1287 1288define <3 x float> @sinh_v3f32(<3 x float> %x) nounwind { 1289; CHECK-LABEL: sinh_v3f32: 1290; CHECK: # %bb.0: 1291; CHECK-NEXT: subq $40, %rsp 1292; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1293; CHECK-NEXT: callq sinhf@PLT 1294; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1295; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1296; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1297; CHECK-NEXT: callq sinhf@PLT 1298; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1299; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1300; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1301; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1302; CHECK-NEXT: # xmm0 = mem[1,0] 1303; CHECK-NEXT: callq sinhf@PLT 1304; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1305; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 1306; CHECK-NEXT: addq $40, %rsp 1307; CHECK-NEXT: retq 1308 %r = call <3 x float> @llvm.sinh.v3f32(<3 x float> %x) 1309 ret <3 x float> %r 1310} 1311 1312define <4 x float> @sinh_v4f32(<4 x float> %x) nounwind { 1313; CHECK-LABEL: sinh_v4f32: 1314; CHECK: # %bb.0: 1315; CHECK-NEXT: subq $40, %rsp 1316; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1317; CHECK-NEXT: callq sinhf@PLT 1318; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1319; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1320; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1321; CHECK-NEXT: callq sinhf@PLT 1322; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1323; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1324; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1325; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1326; CHECK-NEXT: # xmm0 = mem[1,0] 1327; CHECK-NEXT: callq sinhf@PLT 1328; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1329; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 1330; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1331; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1332; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1333; CHECK-NEXT: callq sinhf@PLT 1334; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1335; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1336; CHECK-NEXT: addq $40, %rsp 1337; CHECK-NEXT: retq 1338 %r = call <4 x float> @llvm.sinh.v4f32(<4 x float> %x) 1339 ret <4 x float> %r 1340} 1341 1342define <5 x float> @sinh_v5f32(<5 x float> %x) nounwind { 1343; CHECK-LABEL: sinh_v5f32: 1344; CHECK: # %bb.0: 1345; CHECK-NEXT: subq $72, %rsp 1346; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1347; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1348; CHECK-NEXT: vzeroupper 1349; CHECK-NEXT: callq sinhf@PLT 1350; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1351; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1352; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1353; CHECK-NEXT: callq sinhf@PLT 1354; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1355; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1356; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1357; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1358; CHECK-NEXT: # xmm0 = mem[1,0] 1359; CHECK-NEXT: callq sinhf@PLT 1360; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1361; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 1362; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1363; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1364; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1365; CHECK-NEXT: callq sinhf@PLT 1366; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1367; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1368; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 1369; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1370; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 1371; CHECK-NEXT: vzeroupper 1372; CHECK-NEXT: callq sinhf@PLT 1373; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 1374; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1375; CHECK-NEXT: addq $72, %rsp 1376; CHECK-NEXT: retq 1377 %r = call <5 x float> @llvm.sinh.v5f32(<5 x float> %x) 1378 ret <5 x float> %r 1379} 1380 1381define <6 x float> @sinh_v6f32(<6 x float> %x) nounwind { 1382; CHECK-LABEL: sinh_v6f32: 1383; CHECK: # %bb.0: 1384; CHECK-NEXT: subq $72, %rsp 1385; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1386; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 1387; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1388; CHECK-NEXT: vzeroupper 1389; CHECK-NEXT: callq sinhf@PLT 1390; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1391; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 1392; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1393; CHECK-NEXT: callq sinhf@PLT 1394; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1395; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1396; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1397; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1398; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1399; CHECK-NEXT: vzeroupper 1400; CHECK-NEXT: callq sinhf@PLT 1401; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1402; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1403; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1404; CHECK-NEXT: callq sinhf@PLT 1405; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1406; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1407; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1408; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1409; CHECK-NEXT: # xmm0 = mem[1,0] 1410; CHECK-NEXT: callq sinhf@PLT 1411; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1412; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 1413; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1414; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1415; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1416; CHECK-NEXT: callq sinhf@PLT 1417; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1418; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1419; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1420; CHECK-NEXT: addq $72, %rsp 1421; CHECK-NEXT: retq 1422 %r = call <6 x float> @llvm.sinh.v6f32(<6 x float> %x) 1423 ret <6 x float> %r 1424} 1425 1426define <3 x double> @sinh_v3f64(<3 x double> %x) nounwind { 1427; CHECK-LABEL: sinh_v3f64: 1428; CHECK: # %bb.0: 1429; CHECK-NEXT: subq $72, %rsp 1430; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1431; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1432; CHECK-NEXT: vzeroupper 1433; CHECK-NEXT: callq sinh@PLT 1434; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1435; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1436; CHECK-NEXT: # xmm0 = mem[1,0] 1437; CHECK-NEXT: callq sinh@PLT 1438; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload 1439; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1440; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 1441; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1442; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 1443; CHECK-NEXT: vzeroupper 1444; CHECK-NEXT: callq sinh@PLT 1445; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 1446; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1447; CHECK-NEXT: addq $72, %rsp 1448; CHECK-NEXT: retq 1449 %r = call <3 x double> @llvm.sinh.v3f64(<3 x double> %x) 1450 ret <3 x double> %r 1451} 1452 1453define <1 x float> @tanh_v1f32(<1 x float> %x) nounwind { 1454; CHECK-LABEL: tanh_v1f32: 1455; CHECK: # %bb.0: 1456; CHECK-NEXT: pushq %rax 1457; CHECK-NEXT: callq tanhf@PLT 1458; CHECK-NEXT: popq %rax 1459; CHECK-NEXT: retq 1460 %r = call <1 x float> @llvm.tanh.v1f32(<1 x float> %x) 1461 ret <1 x float> %r 1462} 1463 1464define <2 x float> @tanh_v2f32(<2 x float> %x) nounwind { 1465; CHECK-LABEL: tanh_v2f32: 1466; CHECK: # %bb.0: 1467; CHECK-NEXT: subq $40, %rsp 1468; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1469; CHECK-NEXT: callq tanhf@PLT 1470; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1471; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 1472; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1473; CHECK-NEXT: callq tanhf@PLT 1474; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1475; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1476; CHECK-NEXT: addq $40, %rsp 1477; CHECK-NEXT: retq 1478 %r = call <2 x float> @llvm.tanh.v2f32(<2 x float> %x) 1479 ret <2 x float> %r 1480} 1481 1482define <3 x float> @tanh_v3f32(<3 x float> %x) nounwind { 1483; CHECK-LABEL: tanh_v3f32: 1484; CHECK: # %bb.0: 1485; CHECK-NEXT: subq $40, %rsp 1486; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1487; CHECK-NEXT: callq tanhf@PLT 1488; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1489; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1490; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1491; CHECK-NEXT: callq tanhf@PLT 1492; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1493; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1494; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1495; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1496; CHECK-NEXT: # xmm0 = mem[1,0] 1497; CHECK-NEXT: callq tanhf@PLT 1498; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1499; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 1500; CHECK-NEXT: addq $40, %rsp 1501; CHECK-NEXT: retq 1502 %r = call <3 x float> @llvm.tanh.v3f32(<3 x float> %x) 1503 ret <3 x float> %r 1504} 1505 1506define <4 x float> @tanh_v4f32(<4 x float> %x) nounwind { 1507; CHECK-LABEL: tanh_v4f32: 1508; CHECK: # %bb.0: 1509; CHECK-NEXT: subq $40, %rsp 1510; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1511; CHECK-NEXT: callq tanhf@PLT 1512; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1513; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1514; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1515; CHECK-NEXT: callq tanhf@PLT 1516; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1517; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1518; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1519; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1520; CHECK-NEXT: # xmm0 = mem[1,0] 1521; CHECK-NEXT: callq tanhf@PLT 1522; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1523; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 1524; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1525; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1526; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1527; CHECK-NEXT: callq tanhf@PLT 1528; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1529; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1530; CHECK-NEXT: addq $40, %rsp 1531; CHECK-NEXT: retq 1532 %r = call <4 x float> @llvm.tanh.v4f32(<4 x float> %x) 1533 ret <4 x float> %r 1534} 1535 1536define <5 x float> @tanh_v5f32(<5 x float> %x) nounwind { 1537; CHECK-LABEL: tanh_v5f32: 1538; CHECK: # %bb.0: 1539; CHECK-NEXT: subq $72, %rsp 1540; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1541; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1542; CHECK-NEXT: vzeroupper 1543; CHECK-NEXT: callq tanhf@PLT 1544; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1545; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1546; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1547; CHECK-NEXT: callq tanhf@PLT 1548; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1549; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1550; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1551; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1552; CHECK-NEXT: # xmm0 = mem[1,0] 1553; CHECK-NEXT: callq tanhf@PLT 1554; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1555; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 1556; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1557; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1558; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1559; CHECK-NEXT: callq tanhf@PLT 1560; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1561; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1562; CHECK-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 1563; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1564; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 1565; CHECK-NEXT: vzeroupper 1566; CHECK-NEXT: callq tanhf@PLT 1567; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 1568; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1569; CHECK-NEXT: addq $72, %rsp 1570; CHECK-NEXT: retq 1571 %r = call <5 x float> @llvm.tanh.v5f32(<5 x float> %x) 1572 ret <5 x float> %r 1573} 1574 1575define <6 x float> @tanh_v6f32(<6 x float> %x) nounwind { 1576; CHECK-LABEL: tanh_v6f32: 1577; CHECK: # %bb.0: 1578; CHECK-NEXT: subq $72, %rsp 1579; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1580; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 1581; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1582; CHECK-NEXT: vzeroupper 1583; CHECK-NEXT: callq tanhf@PLT 1584; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1585; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 1586; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1587; CHECK-NEXT: callq tanhf@PLT 1588; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1589; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1590; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1591; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1592; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1593; CHECK-NEXT: vzeroupper 1594; CHECK-NEXT: callq tanhf@PLT 1595; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1596; CHECK-NEXT: vmovshdup {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1597; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1598; CHECK-NEXT: callq tanhf@PLT 1599; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1600; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1601; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1602; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1603; CHECK-NEXT: # xmm0 = mem[1,0] 1604; CHECK-NEXT: callq tanhf@PLT 1605; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1606; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 1607; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1608; CHECK-NEXT: vpermilps $255, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1609; CHECK-NEXT: # xmm0 = mem[3,3,3,3] 1610; CHECK-NEXT: callq tanhf@PLT 1611; CHECK-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1612; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1613; CHECK-NEXT: vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload 1614; CHECK-NEXT: addq $72, %rsp 1615; CHECK-NEXT: retq 1616 %r = call <6 x float> @llvm.tanh.v6f32(<6 x float> %x) 1617 ret <6 x float> %r 1618} 1619 1620define <3 x double> @tanh_v3f64(<3 x double> %x) nounwind { 1621; CHECK-LABEL: tanh_v3f64: 1622; CHECK: # %bb.0: 1623; CHECK-NEXT: subq $72, %rsp 1624; CHECK-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 1625; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 1626; CHECK-NEXT: vzeroupper 1627; CHECK-NEXT: callq tanh@PLT 1628; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1629; CHECK-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 1630; CHECK-NEXT: # xmm0 = mem[1,0] 1631; CHECK-NEXT: callq tanh@PLT 1632; CHECK-NEXT: vmovapd (%rsp), %xmm1 # 16-byte Reload 1633; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1634; CHECK-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 1635; CHECK-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 1636; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0 1637; CHECK-NEXT: vzeroupper 1638; CHECK-NEXT: callq tanh@PLT 1639; CHECK-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 1640; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1641; CHECK-NEXT: addq $72, %rsp 1642; CHECK-NEXT: retq 1643 %r = call <3 x double> @llvm.tanh.v3f64(<3 x double> %x) 1644 ret <3 x double> %r 1645} 1646 1647define <2 x float> @fabs_v2f32(<2 x float> %x) nounwind { 1648; CHECK-LABEL: fabs_v2f32: 1649; CHECK: # %bb.0: 1650; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1651; CHECK-NEXT: retq 1652 %r = call <2 x float> @llvm.fabs.v2f32(<2 x float> %x) 1653 ret <2 x float> %r 1654} 1655 1656define <2 x float> @ceil_v2f32(<2 x float> %x) nounwind { 1657; CHECK-LABEL: ceil_v2f32: 1658; CHECK: # %bb.0: 1659; CHECK-NEXT: vroundps $10, %xmm0, %xmm0 1660; CHECK-NEXT: retq 1661 %r = call <2 x float> @llvm.ceil.v2f32(<2 x float> %x) 1662 ret <2 x float> %r 1663} 1664 1665define <2 x float> @cos_v2f32(<2 x float> %x) nounwind { 1666; CHECK-LABEL: cos_v2f32: 1667; CHECK: # %bb.0: 1668; CHECK-NEXT: subq $40, %rsp 1669; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1670; CHECK-NEXT: callq cosf@PLT 1671; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1672; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 1673; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1674; CHECK-NEXT: callq cosf@PLT 1675; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1676; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1677; CHECK-NEXT: addq $40, %rsp 1678; CHECK-NEXT: retq 1679 %r = call <2 x float> @llvm.cos.v2f32(<2 x float> %x) 1680 ret <2 x float> %r 1681} 1682 1683define <2 x float> @exp_v2f32(<2 x float> %x) nounwind { 1684; CHECK-LABEL: exp_v2f32: 1685; CHECK: # %bb.0: 1686; CHECK-NEXT: subq $40, %rsp 1687; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1688; CHECK-NEXT: callq expf@PLT 1689; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1690; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 1691; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1692; CHECK-NEXT: callq expf@PLT 1693; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1694; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1695; CHECK-NEXT: addq $40, %rsp 1696; CHECK-NEXT: retq 1697 %r = call <2 x float> @llvm.exp.v2f32(<2 x float> %x) 1698 ret <2 x float> %r 1699} 1700 1701define <2 x float> @exp2_v2f32(<2 x float> %x) nounwind { 1702; CHECK-LABEL: exp2_v2f32: 1703; CHECK: # %bb.0: 1704; CHECK-NEXT: subq $40, %rsp 1705; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1706; CHECK-NEXT: callq exp2f@PLT 1707; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1708; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 1709; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1710; CHECK-NEXT: callq exp2f@PLT 1711; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1712; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1713; CHECK-NEXT: addq $40, %rsp 1714; CHECK-NEXT: retq 1715 %r = call <2 x float> @llvm.exp2.v2f32(<2 x float> %x) 1716 ret <2 x float> %r 1717} 1718 1719define <2 x float> @floor_v2f32(<2 x float> %x) nounwind { 1720; CHECK-LABEL: floor_v2f32: 1721; CHECK: # %bb.0: 1722; CHECK-NEXT: vroundps $9, %xmm0, %xmm0 1723; CHECK-NEXT: retq 1724 %r = call <2 x float> @llvm.floor.v2f32(<2 x float> %x) 1725 ret <2 x float> %r 1726} 1727 1728define <2 x float> @log_v2f32(<2 x float> %x) nounwind { 1729; CHECK-LABEL: log_v2f32: 1730; CHECK: # %bb.0: 1731; CHECK-NEXT: subq $40, %rsp 1732; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1733; CHECK-NEXT: callq logf@PLT 1734; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1735; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 1736; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1737; CHECK-NEXT: callq logf@PLT 1738; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1739; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1740; CHECK-NEXT: addq $40, %rsp 1741; CHECK-NEXT: retq 1742 %r = call <2 x float> @llvm.log.v2f32(<2 x float> %x) 1743 ret <2 x float> %r 1744} 1745 1746define <2 x float> @log10_v2f32(<2 x float> %x) nounwind { 1747; CHECK-LABEL: log10_v2f32: 1748; CHECK: # %bb.0: 1749; CHECK-NEXT: subq $40, %rsp 1750; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1751; CHECK-NEXT: callq log10f@PLT 1752; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1753; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 1754; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1755; CHECK-NEXT: callq log10f@PLT 1756; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1757; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1758; CHECK-NEXT: addq $40, %rsp 1759; CHECK-NEXT: retq 1760 %r = call <2 x float> @llvm.log10.v2f32(<2 x float> %x) 1761 ret <2 x float> %r 1762} 1763 1764define <2 x float> @log2_v2f32(<2 x float> %x) nounwind { 1765; CHECK-LABEL: log2_v2f32: 1766; CHECK: # %bb.0: 1767; CHECK-NEXT: subq $40, %rsp 1768; CHECK-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1769; CHECK-NEXT: callq log2f@PLT 1770; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1771; CHECK-NEXT: vmovshdup (%rsp), %xmm0 # 16-byte Folded Reload 1772; CHECK-NEXT: # xmm0 = mem[1,1,3,3] 1773; CHECK-NEXT: callq log2f@PLT 1774; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 1775; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1776; CHECK-NEXT: addq $40, %rsp 1777; CHECK-NEXT: retq 1778 %r = call <2 x float> @llvm.log2.v2f32(<2 x float> %x) 1779 ret <2 x float> %r 1780} 1781 1782define <2 x float> @nearbyint__v2f32(<2 x float> %x) nounwind { 1783; CHECK-LABEL: nearbyint__v2f32: 1784; CHECK: # %bb.0: 1785; CHECK-NEXT: vroundps $12, %xmm0, %xmm0 1786; CHECK-NEXT: retq 1787 %r = call <2 x float> @llvm.nearbyint.v2f32(<2 x float> %x) 1788 ret <2 x float> %r 1789} 1790 1791define <2 x float> @rint_v2f32(<2 x float> %x) nounwind { 1792; CHECK-LABEL: rint_v2f32: 1793; CHECK: # %bb.0: 1794; CHECK-NEXT: vroundps $4, %xmm0, %xmm0 1795; CHECK-NEXT: retq 1796 %r = call <2 x float> @llvm.rint.v2f32(<2 x float> %x) 1797 ret <2 x float> %r 1798} 1799 1800define <2 x float> @round_v2f32(<2 x float> %x) nounwind { 1801; CHECK-LABEL: round_v2f32: 1802; CHECK: # %bb.0: 1803; CHECK-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 1804; CHECK-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 1805; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0 1806; CHECK-NEXT: vroundps $11, %xmm0, %xmm0 1807; CHECK-NEXT: retq 1808 %r = call <2 x float> @llvm.round.v2f32(<2 x float> %x) 1809 ret <2 x float> %r 1810} 1811 1812define <2 x float> @sqrt_v2f32(<2 x float> %x) nounwind { 1813; CHECK-LABEL: sqrt_v2f32: 1814; CHECK: # %bb.0: 1815; CHECK-NEXT: vsqrtps %xmm0, %xmm0 1816; CHECK-NEXT: retq 1817 %r = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %x) 1818 ret <2 x float> %r 1819} 1820 1821define <2 x float> @trunc_v2f32(<2 x float> %x) nounwind { 1822; CHECK-LABEL: trunc_v2f32: 1823; CHECK: # %bb.0: 1824; CHECK-NEXT: vroundps $11, %xmm0, %xmm0 1825; CHECK-NEXT: retq 1826 %r = call <2 x float> @llvm.trunc.v2f32(<2 x float> %x) 1827 ret <2 x float> %r 1828} 1829 1830