1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s 3; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 4; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 5; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+avx512dq < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512DQ 6 7define <1 x float> @constrained_vector_fdiv_v1f32() #0 { 8; CHECK-LABEL: constrained_vector_fdiv_v1f32: 9; CHECK: # %bb.0: # %entry 10; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 11; CHECK-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 12; CHECK-NEXT: retq 13; 14; AVX-LABEL: constrained_vector_fdiv_v1f32: 15; AVX: # %bb.0: # %entry 16; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 17; AVX-NEXT: vdivss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 18; AVX-NEXT: retq 19entry: 20 %div = call <1 x float> @llvm.experimental.constrained.fdiv.v1f32( 21 <1 x float> <float 1.000000e+00>, 22 <1 x float> <float 1.000000e+01>, 23 metadata !"round.dynamic", 24 metadata !"fpexcept.strict") #0 25 ret <1 x float> %div 26} 27 28define <2 x double> @constrained_vector_fdiv_v2f64() #0 { 29; CHECK-LABEL: constrained_vector_fdiv_v2f64: 30; CHECK: # %bb.0: # %entry 31; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] 32; CHECK-NEXT: divpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 33; CHECK-NEXT: retq 34; 35; AVX-LABEL: constrained_vector_fdiv_v2f64: 36; AVX: # %bb.0: # %entry 37; AVX-NEXT: vmovapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] 38; AVX-NEXT: vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 39; AVX-NEXT: retq 40entry: 41 %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64( 42 <2 x double> <double 1.000000e+00, double 2.000000e+00>, 43 <2 x double> <double 1.000000e+01, double 1.000000e+01>, 44 metadata !"round.dynamic", 45 metadata !"fpexcept.strict") #0 46 ret <2 x double> %div 47} 48 49define <3 x float> @constrained_vector_fdiv_v3f32() #0 { 50; CHECK-LABEL: constrained_vector_fdiv_v3f32: 51; CHECK: # %bb.0: # %entry 52; CHECK-NEXT: movss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0] 53; CHECK-NEXT: movss {{.*#+}} xmm2 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] 54; CHECK-NEXT: divss %xmm1, %xmm2 55; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 56; CHECK-NEXT: divss %xmm1, %xmm0 57; CHECK-NEXT: movss {{.*#+}} xmm3 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] 58; CHECK-NEXT: divss %xmm1, %xmm3 59; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 60; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 61; CHECK-NEXT: retq 62; 63; AVX-LABEL: constrained_vector_fdiv_v3f32: 64; AVX: # %bb.0: # %entry 65; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0] 66; AVX-NEXT: vmovss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] 67; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm1 68; AVX-NEXT: vmovss {{.*#+}} xmm2 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 69; AVX-NEXT: vdivss %xmm0, %xmm2, %xmm2 70; AVX-NEXT: vmovss {{.*#+}} xmm3 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] 71; AVX-NEXT: vdivss %xmm0, %xmm3, %xmm0 72; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3] 73; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 74; AVX-NEXT: retq 75entry: 76 %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32( 77 <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, 78 <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>, 79 metadata !"round.dynamic", 80 metadata !"fpexcept.strict") #0 81 ret <3 x float> %div 82} 83 84define <3 x double> @constrained_vector_fdiv_v3f64() #0 { 85; CHECK-LABEL: constrained_vector_fdiv_v3f64: 86; CHECK: # %bb.0: # %entry 87; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] 88; CHECK-NEXT: divpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 89; CHECK-NEXT: movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 90; CHECK-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 91; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) 92; CHECK-NEXT: movapd %xmm0, %xmm1 93; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 94; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 95; CHECK-NEXT: wait 96; CHECK-NEXT: retq 97; 98; AVX-LABEL: constrained_vector_fdiv_v3f64: 99; AVX: # %bb.0: # %entry 100; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] 101; AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 102; AVX-NEXT: vmovapd {{.*#+}} xmm1 = [1.0E+0,2.0E+0] 103; AVX-NEXT: vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 104; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 105; AVX-NEXT: retq 106entry: 107 %div = call <3 x double> @llvm.experimental.constrained.fdiv.v3f64( 108 <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>, 109 <3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>, 110 metadata !"round.dynamic", 111 metadata !"fpexcept.strict") #0 112 ret <3 x double> %div 113} 114 115define <4 x double> @constrained_vector_fdiv_v4f64() #0 { 116; CHECK-LABEL: constrained_vector_fdiv_v4f64: 117; CHECK: # %bb.0: # %entry 118; CHECK-NEXT: movapd {{.*#+}} xmm2 = [1.0E+1,1.0E+1] 119; CHECK-NEXT: movapd {{.*#+}} xmm1 = [3.0E+0,4.0E+0] 120; CHECK-NEXT: divpd %xmm2, %xmm1 121; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,2.0E+0] 122; CHECK-NEXT: divpd %xmm2, %xmm0 123; CHECK-NEXT: retq 124; 125; AVX1-LABEL: constrained_vector_fdiv_v4f64: 126; AVX1: # %bb.0: # %entry 127; AVX1-NEXT: vmovapd {{.*#+}} ymm0 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0] 128; AVX1-NEXT: vdivpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 129; AVX1-NEXT: retq 130; 131; AVX512-LABEL: constrained_vector_fdiv_v4f64: 132; AVX512: # %bb.0: # %entry 133; AVX512-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.0E+1,1.0E+1,1.0E+1,1.0E+1] 134; AVX512-NEXT: vmovapd {{.*#+}} ymm1 = [1.0E+0,2.0E+0,3.0E+0,4.0E+0] 135; AVX512-NEXT: vdivpd %ymm0, %ymm1, %ymm0 136; AVX512-NEXT: retq 137entry: 138 %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64( 139 <4 x double> <double 1.000000e+00, double 2.000000e+00, 140 double 3.000000e+00, double 4.000000e+00>, 141 <4 x double> <double 1.000000e+01, double 1.000000e+01, 142 double 1.000000e+01, double 1.000000e+01>, 143 metadata !"round.dynamic", 144 metadata !"fpexcept.strict") #0 145 ret <4 x double> %div 146} 147 148define <1 x float> @constrained_vector_frem_v1f32() #0 { 149; CHECK-LABEL: constrained_vector_frem_v1f32: 150; CHECK: # %bb.0: # %entry 151; CHECK-NEXT: pushq %rax 152; CHECK-NEXT: .cfi_def_cfa_offset 16 153; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 154; CHECK-NEXT: movss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0] 155; CHECK-NEXT: callq fmodf@PLT 156; CHECK-NEXT: popq %rax 157; CHECK-NEXT: .cfi_def_cfa_offset 8 158; CHECK-NEXT: retq 159; 160; AVX-LABEL: constrained_vector_frem_v1f32: 161; AVX: # %bb.0: # %entry 162; AVX-NEXT: pushq %rax 163; AVX-NEXT: .cfi_def_cfa_offset 16 164; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 165; AVX-NEXT: vmovss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0] 166; AVX-NEXT: callq fmodf@PLT 167; AVX-NEXT: popq %rax 168; AVX-NEXT: .cfi_def_cfa_offset 8 169; AVX-NEXT: retq 170entry: 171 %rem = call <1 x float> @llvm.experimental.constrained.frem.v1f32( 172 <1 x float> <float 1.000000e+00>, 173 <1 x float> <float 1.000000e+01>, 174 metadata !"round.dynamic", 175 metadata !"fpexcept.strict") #0 176 ret <1 x float> %rem 177} 178 179define <2 x double> @constrained_vector_frem_v2f64() #0 { 180; CHECK-LABEL: constrained_vector_frem_v2f64: 181; CHECK: # %bb.0: # %entry 182; CHECK-NEXT: subq $24, %rsp 183; CHECK-NEXT: .cfi_def_cfa_offset 32 184; CHECK-NEXT: movsd {{.*#+}} xmm0 = [2.0E+0,0.0E+0] 185; CHECK-NEXT: movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 186; CHECK-NEXT: callq fmod@PLT 187; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 188; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 189; CHECK-NEXT: movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 190; CHECK-NEXT: callq fmod@PLT 191; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 192; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 193; CHECK-NEXT: addq $24, %rsp 194; CHECK-NEXT: .cfi_def_cfa_offset 8 195; CHECK-NEXT: retq 196; 197; AVX-LABEL: constrained_vector_frem_v2f64: 198; AVX: # %bb.0: # %entry 199; AVX-NEXT: subq $24, %rsp 200; AVX-NEXT: .cfi_def_cfa_offset 32 201; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [2.0E+0,0.0E+0] 202; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 203; AVX-NEXT: callq fmod@PLT 204; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 205; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 206; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 207; AVX-NEXT: callq fmod@PLT 208; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 209; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 210; AVX-NEXT: addq $24, %rsp 211; AVX-NEXT: .cfi_def_cfa_offset 8 212; AVX-NEXT: retq 213entry: 214 %rem = call <2 x double> @llvm.experimental.constrained.frem.v2f64( 215 <2 x double> <double 1.000000e+00, double 2.000000e+00>, 216 <2 x double> <double 1.000000e+01, double 1.000000e+01>, 217 metadata !"round.dynamic", 218 metadata !"fpexcept.strict") #0 219 ret <2 x double> %rem 220} 221 222define <3 x float> @constrained_vector_frem_v3f32() #0 { 223; CHECK-LABEL: constrained_vector_frem_v3f32: 224; CHECK: # %bb.0: # %entry 225; CHECK-NEXT: subq $40, %rsp 226; CHECK-NEXT: .cfi_def_cfa_offset 48 227; CHECK-NEXT: movss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] 228; CHECK-NEXT: movss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0] 229; CHECK-NEXT: callq fmodf@PLT 230; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 231; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 232; CHECK-NEXT: movss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0] 233; CHECK-NEXT: callq fmodf@PLT 234; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 235; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] 236; CHECK-NEXT: movss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0] 237; CHECK-NEXT: callq fmodf@PLT 238; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 239; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 240; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 241; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 242; CHECK-NEXT: movaps %xmm1, %xmm0 243; CHECK-NEXT: addq $40, %rsp 244; CHECK-NEXT: .cfi_def_cfa_offset 8 245; CHECK-NEXT: retq 246; 247; AVX-LABEL: constrained_vector_frem_v3f32: 248; AVX: # %bb.0: # %entry 249; AVX-NEXT: subq $40, %rsp 250; AVX-NEXT: .cfi_def_cfa_offset 48 251; AVX-NEXT: vmovss {{.*#+}} xmm0 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] 252; AVX-NEXT: vmovss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0] 253; AVX-NEXT: callq fmodf@PLT 254; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 255; AVX-NEXT: vmovss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 256; AVX-NEXT: vmovss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0] 257; AVX-NEXT: callq fmodf@PLT 258; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 259; AVX-NEXT: vmovss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] 260; AVX-NEXT: vmovss {{.*#+}} xmm1 = [1.0E+1,0.0E+0,0.0E+0,0.0E+0] 261; AVX-NEXT: callq fmodf@PLT 262; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 263; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 264; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 265; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 266; AVX-NEXT: addq $40, %rsp 267; AVX-NEXT: .cfi_def_cfa_offset 8 268; AVX-NEXT: retq 269entry: 270 %rem = call <3 x float> @llvm.experimental.constrained.frem.v3f32( 271 <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, 272 <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>, 273 metadata !"round.dynamic", 274 metadata !"fpexcept.strict") #0 275 ret <3 x float> %rem 276} 277 278define <3 x double> @constrained_vector_frem_v3f64() #0 { 279; CHECK-LABEL: constrained_vector_frem_v3f64: 280; CHECK: # %bb.0: # %entry 281; CHECK-NEXT: subq $24, %rsp 282; CHECK-NEXT: .cfi_def_cfa_offset 32 283; CHECK-NEXT: movsd {{.*#+}} xmm0 = [2.0E+0,0.0E+0] 284; CHECK-NEXT: movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 285; CHECK-NEXT: callq fmod@PLT 286; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 287; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 288; CHECK-NEXT: movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 289; CHECK-NEXT: callq fmod@PLT 290; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 291; CHECK-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] 292; CHECK-NEXT: movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 293; CHECK-NEXT: callq fmod@PLT 294; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 295; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 296; CHECK-NEXT: wait 297; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 298; CHECK-NEXT: # xmm0 = mem[0],zero 299; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 300; CHECK-NEXT: # xmm1 = mem[0],zero 301; CHECK-NEXT: addq $24, %rsp 302; CHECK-NEXT: .cfi_def_cfa_offset 8 303; CHECK-NEXT: retq 304; 305; AVX-LABEL: constrained_vector_frem_v3f64: 306; AVX: # %bb.0: # %entry 307; AVX-NEXT: subq $40, %rsp 308; AVX-NEXT: .cfi_def_cfa_offset 48 309; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [2.0E+0,0.0E+0] 310; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 311; AVX-NEXT: callq fmod@PLT 312; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 313; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 314; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 315; AVX-NEXT: callq fmod@PLT 316; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 317; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 318; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 319; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] 320; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 321; AVX-NEXT: vzeroupper 322; AVX-NEXT: callq fmod@PLT 323; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 324; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 325; AVX-NEXT: addq $40, %rsp 326; AVX-NEXT: .cfi_def_cfa_offset 8 327; AVX-NEXT: retq 328entry: 329 %rem = call <3 x double> @llvm.experimental.constrained.frem.v3f64( 330 <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>, 331 <3 x double> <double 1.000000e+01, double 1.000000e+01, double 1.000000e+01>, 332 metadata !"round.dynamic", 333 metadata !"fpexcept.strict") #0 334 ret <3 x double> %rem 335} 336 337define <4 x double> @constrained_vector_frem_v4f64() #0 { 338; CHECK-LABEL: constrained_vector_frem_v4f64: 339; CHECK: # %bb.0: 340; CHECK-NEXT: subq $40, %rsp 341; CHECK-NEXT: .cfi_def_cfa_offset 48 342; CHECK-NEXT: movsd {{.*#+}} xmm0 = [2.0E+0,0.0E+0] 343; CHECK-NEXT: movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 344; CHECK-NEXT: callq fmod@PLT 345; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 346; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 347; CHECK-NEXT: movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 348; CHECK-NEXT: callq fmod@PLT 349; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 350; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 351; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 352; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.0E+0,0.0E+0] 353; CHECK-NEXT: movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 354; CHECK-NEXT: callq fmod@PLT 355; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 356; CHECK-NEXT: movsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] 357; CHECK-NEXT: movsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 358; CHECK-NEXT: callq fmod@PLT 359; CHECK-NEXT: movaps %xmm0, %xmm1 360; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 361; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 362; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 363; CHECK-NEXT: addq $40, %rsp 364; CHECK-NEXT: .cfi_def_cfa_offset 8 365; CHECK-NEXT: retq 366; 367; AVX-LABEL: constrained_vector_frem_v4f64: 368; AVX: # %bb.0: 369; AVX-NEXT: subq $40, %rsp 370; AVX-NEXT: .cfi_def_cfa_offset 48 371; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.0E+0,0.0E+0] 372; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 373; AVX-NEXT: callq fmod@PLT 374; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 375; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [3.0E+0,0.0E+0] 376; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 377; AVX-NEXT: callq fmod@PLT 378; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 379; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 380; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 381; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [2.0E+0,0.0E+0] 382; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 383; AVX-NEXT: callq fmod@PLT 384; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 385; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] 386; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [1.0E+1,0.0E+0] 387; AVX-NEXT: callq fmod@PLT 388; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 389; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 390; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 391; AVX-NEXT: addq $40, %rsp 392; AVX-NEXT: .cfi_def_cfa_offset 8 393; AVX-NEXT: retq 394 %rem = call <4 x double> @llvm.experimental.constrained.frem.v4f64( 395 <4 x double> <double 1.000000e+00, double 2.000000e+00, 396 double 3.000000e+00, double 4.000000e+00>, 397 <4 x double> <double 1.000000e+01, double 1.000000e+01, 398 double 1.000000e+01, double 1.000000e+01>, 399 metadata !"round.dynamic", 400 metadata !"fpexcept.strict") #0 401 ret <4 x double> %rem 402} 403 404define <1 x float> @constrained_vector_fmul_v1f32() #0 { 405; CHECK-LABEL: constrained_vector_fmul_v1f32: 406; CHECK: # %bb.0: # %entry 407; CHECK-NEXT: movss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0] 408; CHECK-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 409; CHECK-NEXT: retq 410; 411; AVX-LABEL: constrained_vector_fmul_v1f32: 412; AVX: # %bb.0: # %entry 413; AVX-NEXT: vmovss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0] 414; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 415; AVX-NEXT: retq 416entry: 417 %mul = call <1 x float> @llvm.experimental.constrained.fmul.v1f32( 418 <1 x float> <float 0x7FF0000000000000>, 419 <1 x float> <float 2.000000e+00>, 420 metadata !"round.dynamic", 421 metadata !"fpexcept.strict") #0 422 ret <1 x float> %mul 423} 424 425define <2 x double> @constrained_vector_fmul_v2f64() #0 { 426; CHECK-LABEL: constrained_vector_fmul_v2f64: 427; CHECK: # %bb.0: # %entry 428; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 429; CHECK-NEXT: mulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 430; CHECK-NEXT: retq 431; 432; AVX-LABEL: constrained_vector_fmul_v2f64: 433; AVX: # %bb.0: # %entry 434; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 435; AVX-NEXT: # xmm0 = mem[0,0] 436; AVX-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 437; AVX-NEXT: retq 438entry: 439 %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64( 440 <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>, 441 <2 x double> <double 2.000000e+00, double 3.000000e+00>, 442 metadata !"round.dynamic", 443 metadata !"fpexcept.strict") #0 444 ret <2 x double> %mul 445} 446 447define <3 x float> @constrained_vector_fmul_v3f32() #0 { 448; CHECK-LABEL: constrained_vector_fmul_v3f32: 449; CHECK: # %bb.0: # %entry 450; CHECK-NEXT: movss {{.*#+}} xmm1 = [+Inf,0.0E+0,0.0E+0,0.0E+0] 451; CHECK-NEXT: movss {{.*#+}} xmm2 = [1.0E+2,0.0E+0,0.0E+0,0.0E+0] 452; CHECK-NEXT: mulss %xmm1, %xmm2 453; CHECK-NEXT: movss {{.*#+}} xmm0 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0] 454; CHECK-NEXT: mulss %xmm1, %xmm0 455; CHECK-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 456; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 457; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 458; CHECK-NEXT: retq 459; 460; AVX-LABEL: constrained_vector_fmul_v3f32: 461; AVX: # %bb.0: # %entry 462; AVX-NEXT: vmovss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0] 463; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 464; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 465; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 466; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3] 467; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 468; AVX-NEXT: retq 469entry: 470 %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32( 471 <3 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000, 472 float 0x7FF0000000000000>, 473 <3 x float> <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02>, 474 metadata !"round.dynamic", 475 metadata !"fpexcept.strict") #0 476 ret <3 x float> %mul 477} 478 479define <3 x double> @constrained_vector_fmul_v3f64() #0 { 480; CHECK-LABEL: constrained_vector_fmul_v3f64: 481; CHECK: # %bb.0: # %entry 482; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 483; CHECK-NEXT: mulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 484; CHECK-NEXT: movsd {{.*#+}} xmm1 = [1.7976931348623157E+308,0.0E+0] 485; CHECK-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 486; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) 487; CHECK-NEXT: movapd %xmm0, %xmm1 488; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 489; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 490; CHECK-NEXT: wait 491; CHECK-NEXT: retq 492; 493; AVX-LABEL: constrained_vector_fmul_v3f64: 494; AVX: # %bb.0: # %entry 495; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [1.7976931348623157E+308,0.0E+0] 496; AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 497; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] 498; AVX-NEXT: # xmm1 = mem[0,0] 499; AVX-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 500; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 501; AVX-NEXT: retq 502entry: 503 %mul = call <3 x double> @llvm.experimental.constrained.fmul.v3f64( 504 <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF, 505 double 0x7FEFFFFFFFFFFFFF>, 506 <3 x double> <double 1.000000e+00, double 1.000000e+01, double 1.000000e+02>, 507 metadata !"round.dynamic", 508 metadata !"fpexcept.strict") #0 509 ret <3 x double> %mul 510} 511 512define <4 x double> @constrained_vector_fmul_v4f64() #0 { 513; CHECK-LABEL: constrained_vector_fmul_v4f64: 514; CHECK: # %bb.0: # %entry 515; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 516; CHECK-NEXT: movapd {{.*#+}} xmm1 = [4.0E+0,5.0E+0] 517; CHECK-NEXT: mulpd %xmm0, %xmm1 518; CHECK-NEXT: mulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 519; CHECK-NEXT: retq 520; 521; AVX-LABEL: constrained_vector_fmul_v4f64: 522; AVX: # %bb.0: # %entry 523; AVX-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308] 524; AVX-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 525; AVX-NEXT: retq 526entry: 527 %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64( 528 <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF, 529 double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>, 530 <4 x double> <double 2.000000e+00, double 3.000000e+00, 531 double 4.000000e+00, double 5.000000e+00>, 532 metadata !"round.dynamic", 533 metadata !"fpexcept.strict") #0 534 ret <4 x double> %mul 535} 536 537define <1 x float> @constrained_vector_fadd_v1f32() #0 { 538; CHECK-LABEL: constrained_vector_fadd_v1f32: 539; CHECK: # %bb.0: # %entry 540; CHECK-NEXT: movss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0] 541; CHECK-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 542; CHECK-NEXT: retq 543; 544; AVX-LABEL: constrained_vector_fadd_v1f32: 545; AVX: # %bb.0: # %entry 546; AVX-NEXT: vmovss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0] 547; AVX-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 548; AVX-NEXT: retq 549entry: 550 %add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32( 551 <1 x float> <float 0x7FF0000000000000>, 552 <1 x float> <float 1.0>, 553 metadata !"round.dynamic", 554 metadata !"fpexcept.strict") #0 555 ret <1 x float> %add 556} 557 558define <2 x double> @constrained_vector_fadd_v2f64() #0 { 559; CHECK-LABEL: constrained_vector_fadd_v2f64: 560; CHECK: # %bb.0: # %entry 561; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 562; CHECK-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 563; CHECK-NEXT: retq 564; 565; AVX-LABEL: constrained_vector_fadd_v2f64: 566; AVX: # %bb.0: # %entry 567; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 568; AVX-NEXT: # xmm0 = mem[0,0] 569; AVX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 570; AVX-NEXT: retq 571entry: 572 %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64( 573 <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>, 574 <2 x double> <double 1.000000e+00, double 1.000000e-01>, 575 metadata !"round.dynamic", 576 metadata !"fpexcept.strict") #0 577 ret <2 x double> %add 578} 579 580define <3 x float> @constrained_vector_fadd_v3f32() #0 { 581; CHECK-LABEL: constrained_vector_fadd_v3f32: 582; CHECK: # %bb.0: # %entry 583; CHECK-NEXT: xorps %xmm1, %xmm1 584; CHECK-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0] 585; CHECK-NEXT: addss %xmm2, %xmm1 586; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] 587; CHECK-NEXT: addss %xmm2, %xmm0 588; CHECK-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 589; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 590; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 591; CHECK-NEXT: retq 592; 593; AVX-LABEL: constrained_vector_fadd_v3f32: 594; AVX: # %bb.0: # %entry 595; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 596; AVX-NEXT: vmovss {{.*#+}} xmm1 = [NaN,0.0E+0,0.0E+0,0.0E+0] 597; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 598; AVX-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 599; AVX-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 600; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 601; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 602; AVX-NEXT: retq 603entry: 604 %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32( 605 <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, 606 float 0xFFFFFFFFE0000000>, 607 <3 x float> <float 2.0, float 1.0, float 0.0>, 608 metadata !"round.dynamic", 609 metadata !"fpexcept.strict") #0 610 ret <3 x float> %add 611} 612 613define <3 x double> @constrained_vector_fadd_v3f64() #0 { 614; CHECK-LABEL: constrained_vector_fadd_v3f64: 615; CHECK: # %bb.0: # %entry 616; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 617; CHECK-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 618; CHECK-NEXT: xorpd %xmm1, %xmm1 619; CHECK-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 620; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) 621; CHECK-NEXT: movapd %xmm0, %xmm1 622; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 623; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 624; CHECK-NEXT: wait 625; CHECK-NEXT: retq 626; 627; AVX-LABEL: constrained_vector_fadd_v3f64: 628; AVX: # %bb.0: # %entry 629; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 630; AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 631; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [1.7976931348623157E+308,1.7976931348623157E+308] 632; AVX-NEXT: # xmm1 = mem[0,0] 633; AVX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 634; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 635; AVX-NEXT: retq 636entry: 637 %add = call <3 x double> @llvm.experimental.constrained.fadd.v3f64( 638 <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF, 639 double 0x7FEFFFFFFFFFFFFF>, 640 <3 x double> <double 2.0, double 1.0, double 0.0>, 641 metadata !"round.dynamic", 642 metadata !"fpexcept.strict") #0 643 ret <3 x double> %add 644} 645 646define <4 x double> @constrained_vector_fadd_v4f64() #0 { 647; CHECK-LABEL: constrained_vector_fadd_v4f64: 648; CHECK: # %bb.0: # %entry 649; CHECK-NEXT: movapd {{.*#+}} xmm0 = [1.7976931348623157E+308,1.7976931348623157E+308] 650; CHECK-NEXT: movapd {{.*#+}} xmm1 = [2.0E+0,2.0000000000000001E-1] 651; CHECK-NEXT: addpd %xmm0, %xmm1 652; CHECK-NEXT: addpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 653; CHECK-NEXT: retq 654; 655; AVX-LABEL: constrained_vector_fadd_v4f64: 656; AVX: # %bb.0: # %entry 657; AVX-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308,1.7976931348623157E+308] 658; AVX-NEXT: vaddpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 659; AVX-NEXT: retq 660entry: 661 %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64( 662 <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF, 663 double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>, 664 <4 x double> <double 1.000000e+00, double 1.000000e-01, 665 double 2.000000e+00, double 2.000000e-01>, 666 metadata !"round.dynamic", 667 metadata !"fpexcept.strict") #0 668 ret <4 x double> %add 669} 670 671define <1 x float> @constrained_vector_fsub_v1f32() #0 { 672; CHECK-LABEL: constrained_vector_fsub_v1f32: 673; CHECK: # %bb.0: # %entry 674; CHECK-NEXT: movss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0] 675; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 676; CHECK-NEXT: retq 677; 678; AVX-LABEL: constrained_vector_fsub_v1f32: 679; AVX: # %bb.0: # %entry 680; AVX-NEXT: vmovss {{.*#+}} xmm0 = [+Inf,0.0E+0,0.0E+0,0.0E+0] 681; AVX-NEXT: vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 682; AVX-NEXT: retq 683entry: 684 %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32( 685 <1 x float> <float 0x7FF0000000000000>, 686 <1 x float> <float 1.000000e+00>, 687 metadata !"round.dynamic", 688 metadata !"fpexcept.strict") #0 689 ret <1 x float> %sub 690} 691 692define <2 x double> @constrained_vector_fsub_v2f64() #0 { 693; CHECK-LABEL: constrained_vector_fsub_v2f64: 694; CHECK: # %bb.0: # %entry 695; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] 696; CHECK-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 697; CHECK-NEXT: retq 698; 699; AVX-LABEL: constrained_vector_fsub_v2f64: 700; AVX: # %bb.0: # %entry 701; AVX-NEXT: vmovddup {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] 702; AVX-NEXT: # xmm0 = mem[0,0] 703; AVX-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 704; AVX-NEXT: retq 705entry: 706 %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64( 707 <2 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>, 708 <2 x double> <double 1.000000e+00, double 1.000000e-01>, 709 metadata !"round.dynamic", 710 metadata !"fpexcept.strict") #0 711 ret <2 x double> %sub 712} 713 714define <3 x float> @constrained_vector_fsub_v3f32() #0 { 715; CHECK-LABEL: constrained_vector_fsub_v3f32: 716; CHECK: # %bb.0: # %entry 717; CHECK-NEXT: xorps %xmm0, %xmm0 718; CHECK-NEXT: movss {{.*#+}} xmm1 = [NaN,0.0E+0,0.0E+0,0.0E+0] 719; CHECK-NEXT: movaps %xmm1, %xmm2 720; CHECK-NEXT: subss %xmm0, %xmm2 721; CHECK-NEXT: movaps %xmm1, %xmm0 722; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 723; CHECK-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 724; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 725; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0] 726; CHECK-NEXT: retq 727; 728; AVX-LABEL: constrained_vector_fsub_v3f32: 729; AVX: # %bb.0: # %entry 730; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 731; AVX-NEXT: vmovss {{.*#+}} xmm1 = [NaN,0.0E+0,0.0E+0,0.0E+0] 732; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 733; AVX-NEXT: vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 734; AVX-NEXT: vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 735; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 736; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 737; AVX-NEXT: retq 738entry: 739 %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32( 740 <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000, 741 float 0xFFFFFFFFE0000000>, 742 <3 x float> <float 2.0, float 1.0, float 0.0>, 743 metadata !"round.dynamic", 744 metadata !"fpexcept.strict") #0 745 ret <3 x float> %sub 746} 747 748define <3 x double> @constrained_vector_fsub_v3f64() #0 { 749; CHECK-LABEL: constrained_vector_fsub_v3f64: 750; CHECK: # %bb.0: # %entry 751; CHECK-NEXT: xorpd %xmm0, %xmm0 752; CHECK-NEXT: movsd {{.*#+}} xmm1 = [-1.7976931348623157E+308,0.0E+0] 753; CHECK-NEXT: subsd %xmm0, %xmm1 754; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] 755; CHECK-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 756; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) 757; CHECK-NEXT: movapd %xmm0, %xmm1 758; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 759; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 760; CHECK-NEXT: wait 761; CHECK-NEXT: retq 762; 763; AVX-LABEL: constrained_vector_fsub_v3f64: 764; AVX: # %bb.0: # %entry 765; AVX-NEXT: vxorpd %xmm0, %xmm0, %xmm0 766; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [-1.7976931348623157E+308,0.0E+0] 767; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 768; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [-1.7976931348623157E+308,-1.7976931348623157E+308] 769; AVX-NEXT: # xmm1 = mem[0,0] 770; AVX-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 771; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 772; AVX-NEXT: retq 773entry: 774 %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64( 775 <3 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF, 776 double 0xFFEFFFFFFFFFFFFF>, 777 <3 x double> <double 2.0, double 1.0, double 0.0>, 778 metadata !"round.dynamic", 779 metadata !"fpexcept.strict") #0 780 ret <3 x double> %sub 781} 782 783define <4 x double> @constrained_vector_fsub_v4f64() #0 { 784; CHECK-LABEL: constrained_vector_fsub_v4f64: 785; CHECK: # %bb.0: # %entry 786; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308] 787; CHECK-NEXT: movapd %xmm0, %xmm1 788; CHECK-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 789; CHECK-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 790; CHECK-NEXT: retq 791; 792; AVX-LABEL: constrained_vector_fsub_v4f64: 793; AVX: # %bb.0: # %entry 794; AVX-NEXT: vbroadcastsd {{.*#+}} ymm0 = [-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308,-1.7976931348623157E+308] 795; AVX-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 796; AVX-NEXT: retq 797entry: 798 %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64( 799 <4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF, 800 double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>, 801 <4 x double> <double 1.000000e+00, double 1.000000e-01, 802 double 2.000000e+00, double 2.000000e-01>, 803 metadata !"round.dynamic", 804 metadata !"fpexcept.strict") #0 805 ret <4 x double> %sub 806} 807 808define <1 x float> @constrained_vector_sqrt_v1f32() #0 { 809; CHECK-LABEL: constrained_vector_sqrt_v1f32: 810; CHECK: # %bb.0: # %entry 811; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 812; CHECK-NEXT: sqrtss %xmm0, %xmm0 813; CHECK-NEXT: retq 814; 815; AVX-LABEL: constrained_vector_sqrt_v1f32: 816; AVX: # %bb.0: # %entry 817; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 818; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 819; AVX-NEXT: retq 820entry: 821 %sqrt = call <1 x float> @llvm.experimental.constrained.sqrt.v1f32( 822 <1 x float> <float 42.0>, 823 metadata !"round.dynamic", 824 metadata !"fpexcept.strict") #0 825 ret <1 x float> %sqrt 826} 827 828define <2 x double> @constrained_vector_sqrt_v2f64() #0 { 829; CHECK-LABEL: constrained_vector_sqrt_v2f64: 830; CHECK: # %bb.0: # %entry 831; CHECK-NEXT: sqrtpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 832; CHECK-NEXT: retq 833; 834; AVX-LABEL: constrained_vector_sqrt_v2f64: 835; AVX: # %bb.0: # %entry 836; AVX-NEXT: vsqrtpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 837; AVX-NEXT: retq 838entry: 839 %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64( 840 <2 x double> <double 42.0, double 42.1>, 841 metadata !"round.dynamic", 842 metadata !"fpexcept.strict") #0 843 ret <2 x double> %sqrt 844} 845 846define <3 x float> @constrained_vector_sqrt_v3f32() #0 { 847; CHECK-LABEL: constrained_vector_sqrt_v3f32: 848; CHECK: # %bb.0: # %entry 849; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 850; CHECK-NEXT: sqrtss %xmm0, %xmm1 851; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 852; CHECK-NEXT: sqrtss %xmm0, %xmm0 853; CHECK-NEXT: movss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 854; CHECK-NEXT: sqrtss %xmm2, %xmm2 855; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 856; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 857; CHECK-NEXT: retq 858; 859; AVX-LABEL: constrained_vector_sqrt_v3f32: 860; AVX: # %bb.0: # %entry 861; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 862; AVX-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 863; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 864; AVX-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 865; AVX-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 866; AVX-NEXT: vsqrtss %xmm2, %xmm2, %xmm2 867; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 868; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 869; AVX-NEXT: retq 870entry: 871 %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32( 872 <3 x float> <float 42.0, float 43.0, float 44.0>, 873 metadata !"round.dynamic", 874 metadata !"fpexcept.strict") #0 875 ret <3 x float> %sqrt 876} 877 878define <3 x double> @constrained_vector_sqrt_v3f64() #0 { 879; CHECK-LABEL: constrained_vector_sqrt_v3f64: 880; CHECK: # %bb.0: # %entry 881; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 882; CHECK-NEXT: sqrtsd %xmm0, %xmm1 883; CHECK-NEXT: sqrtpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 884; CHECK-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) 885; CHECK-NEXT: movapd %xmm0, %xmm1 886; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] 887; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 888; CHECK-NEXT: wait 889; CHECK-NEXT: retq 890; 891; AVX-LABEL: constrained_vector_sqrt_v3f64: 892; AVX: # %bb.0: # %entry 893; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 894; AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 895; AVX-NEXT: vsqrtpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 896; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 897; AVX-NEXT: retq 898entry: 899 %sqrt = call <3 x double> @llvm.experimental.constrained.sqrt.v3f64( 900 <3 x double> <double 42.0, double 42.1, double 42.2>, 901 metadata !"round.dynamic", 902 metadata !"fpexcept.strict") #0 903 ret <3 x double> %sqrt 904} 905 906define <4 x double> @constrained_vector_sqrt_v4f64() #0 { 907; CHECK-LABEL: constrained_vector_sqrt_v4f64: 908; CHECK: # %bb.0: # %entry 909; CHECK-NEXT: sqrtpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 910; CHECK-NEXT: sqrtpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 911; CHECK-NEXT: retq 912; 913; AVX-LABEL: constrained_vector_sqrt_v4f64: 914; AVX: # %bb.0: # %entry 915; AVX-NEXT: vsqrtpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 916; AVX-NEXT: retq 917 entry: 918 %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64( 919 <4 x double> <double 42.0, double 42.1, 920 double 42.2, double 42.3>, 921 metadata !"round.dynamic", 922 metadata !"fpexcept.strict") #0 923 ret <4 x double> %sqrt 924} 925 926define <1 x float> @constrained_vector_pow_v1f32() #0 { 927; CHECK-LABEL: constrained_vector_pow_v1f32: 928; CHECK: # %bb.0: # %entry 929; CHECK-NEXT: pushq %rax 930; CHECK-NEXT: .cfi_def_cfa_offset 16 931; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 932; CHECK-NEXT: movss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] 933; CHECK-NEXT: callq powf@PLT 934; CHECK-NEXT: popq %rax 935; CHECK-NEXT: .cfi_def_cfa_offset 8 936; CHECK-NEXT: retq 937; 938; AVX-LABEL: constrained_vector_pow_v1f32: 939; AVX: # %bb.0: # %entry 940; AVX-NEXT: pushq %rax 941; AVX-NEXT: .cfi_def_cfa_offset 16 942; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 943; AVX-NEXT: vmovss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] 944; AVX-NEXT: callq powf@PLT 945; AVX-NEXT: popq %rax 946; AVX-NEXT: .cfi_def_cfa_offset 8 947; AVX-NEXT: retq 948entry: 949 %pow = call <1 x float> @llvm.experimental.constrained.pow.v1f32( 950 <1 x float> <float 42.0>, 951 <1 x float> <float 3.0>, 952 metadata !"round.dynamic", 953 metadata !"fpexcept.strict") #0 954 ret <1 x float> %pow 955} 956 957define <2 x double> @constrained_vector_pow_v2f64() #0 { 958; CHECK-LABEL: constrained_vector_pow_v2f64: 959; CHECK: # %bb.0: # %entry 960; CHECK-NEXT: subq $24, %rsp 961; CHECK-NEXT: .cfi_def_cfa_offset 32 962; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 963; CHECK-NEXT: movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 964; CHECK-NEXT: callq pow@PLT 965; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 966; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 967; CHECK-NEXT: movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 968; CHECK-NEXT: callq pow@PLT 969; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 970; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 971; CHECK-NEXT: addq $24, %rsp 972; CHECK-NEXT: .cfi_def_cfa_offset 8 973; CHECK-NEXT: retq 974; 975; AVX-LABEL: constrained_vector_pow_v2f64: 976; AVX: # %bb.0: # %entry 977; AVX-NEXT: subq $24, %rsp 978; AVX-NEXT: .cfi_def_cfa_offset 32 979; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 980; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 981; AVX-NEXT: callq pow@PLT 982; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 983; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 984; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 985; AVX-NEXT: callq pow@PLT 986; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 987; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 988; AVX-NEXT: addq $24, %rsp 989; AVX-NEXT: .cfi_def_cfa_offset 8 990; AVX-NEXT: retq 991entry: 992 %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64( 993 <2 x double> <double 42.1, double 42.2>, 994 <2 x double> <double 3.0, double 3.0>, 995 metadata !"round.dynamic", 996 metadata !"fpexcept.strict") #0 997 ret <2 x double> %pow 998} 999 1000define <3 x float> @constrained_vector_pow_v3f32() #0 { 1001; CHECK-LABEL: constrained_vector_pow_v3f32: 1002; CHECK: # %bb.0: # %entry 1003; CHECK-NEXT: subq $40, %rsp 1004; CHECK-NEXT: .cfi_def_cfa_offset 48 1005; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 1006; CHECK-NEXT: movss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] 1007; CHECK-NEXT: callq powf@PLT 1008; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1009; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1010; CHECK-NEXT: movss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] 1011; CHECK-NEXT: callq powf@PLT 1012; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1013; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 1014; CHECK-NEXT: movss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] 1015; CHECK-NEXT: callq powf@PLT 1016; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 1017; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1018; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1019; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1020; CHECK-NEXT: movaps %xmm1, %xmm0 1021; CHECK-NEXT: addq $40, %rsp 1022; CHECK-NEXT: .cfi_def_cfa_offset 8 1023; CHECK-NEXT: retq 1024; 1025; AVX-LABEL: constrained_vector_pow_v3f32: 1026; AVX: # %bb.0: # %entry 1027; AVX-NEXT: subq $40, %rsp 1028; AVX-NEXT: .cfi_def_cfa_offset 48 1029; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 1030; AVX-NEXT: vmovss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] 1031; AVX-NEXT: callq powf@PLT 1032; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1033; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1034; AVX-NEXT: vmovss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] 1035; AVX-NEXT: callq powf@PLT 1036; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1037; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 1038; AVX-NEXT: vmovss {{.*#+}} xmm1 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] 1039; AVX-NEXT: callq powf@PLT 1040; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1041; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1042; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1043; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 1044; AVX-NEXT: addq $40, %rsp 1045; AVX-NEXT: .cfi_def_cfa_offset 8 1046; AVX-NEXT: retq 1047entry: 1048 %pow = call <3 x float> @llvm.experimental.constrained.pow.v3f32( 1049 <3 x float> <float 42.0, float 43.0, float 44.0>, 1050 <3 x float> <float 3.0, float 3.0, float 3.0>, 1051 metadata !"round.dynamic", 1052 metadata !"fpexcept.strict") #0 1053 ret <3 x float> %pow 1054} 1055 1056define <3 x double> @constrained_vector_pow_v3f64() #0 { 1057; CHECK-LABEL: constrained_vector_pow_v3f64: 1058; CHECK: # %bb.0: # %entry 1059; CHECK-NEXT: subq $24, %rsp 1060; CHECK-NEXT: .cfi_def_cfa_offset 32 1061; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1062; CHECK-NEXT: movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 1063; CHECK-NEXT: callq pow@PLT 1064; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1065; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1066; CHECK-NEXT: movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 1067; CHECK-NEXT: callq pow@PLT 1068; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 1069; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1070; CHECK-NEXT: movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 1071; CHECK-NEXT: callq pow@PLT 1072; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 1073; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 1074; CHECK-NEXT: wait 1075; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 1076; CHECK-NEXT: # xmm0 = mem[0],zero 1077; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 1078; CHECK-NEXT: # xmm1 = mem[0],zero 1079; CHECK-NEXT: addq $24, %rsp 1080; CHECK-NEXT: .cfi_def_cfa_offset 8 1081; CHECK-NEXT: retq 1082; 1083; AVX-LABEL: constrained_vector_pow_v3f64: 1084; AVX: # %bb.0: # %entry 1085; AVX-NEXT: subq $40, %rsp 1086; AVX-NEXT: .cfi_def_cfa_offset 48 1087; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1088; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 1089; AVX-NEXT: callq pow@PLT 1090; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1091; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1092; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 1093; AVX-NEXT: callq pow@PLT 1094; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1095; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1096; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 1097; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1098; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 1099; AVX-NEXT: vzeroupper 1100; AVX-NEXT: callq pow@PLT 1101; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 1102; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1103; AVX-NEXT: addq $40, %rsp 1104; AVX-NEXT: .cfi_def_cfa_offset 8 1105; AVX-NEXT: retq 1106entry: 1107 %pow = call <3 x double> @llvm.experimental.constrained.pow.v3f64( 1108 <3 x double> <double 42.0, double 42.1, double 42.2>, 1109 <3 x double> <double 3.0, double 3.0, double 3.0>, 1110 metadata !"round.dynamic", 1111 metadata !"fpexcept.strict") #0 1112 ret <3 x double> %pow 1113} 1114 1115define <4 x double> @constrained_vector_pow_v4f64() #0 { 1116; CHECK-LABEL: constrained_vector_pow_v4f64: 1117; CHECK: # %bb.0: # %entry 1118; CHECK-NEXT: subq $40, %rsp 1119; CHECK-NEXT: .cfi_def_cfa_offset 48 1120; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1121; CHECK-NEXT: movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 1122; CHECK-NEXT: callq pow@PLT 1123; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1124; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1125; CHECK-NEXT: movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 1126; CHECK-NEXT: callq pow@PLT 1127; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1128; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1129; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1130; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0] 1131; CHECK-NEXT: movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 1132; CHECK-NEXT: callq pow@PLT 1133; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1134; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 1135; CHECK-NEXT: movsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 1136; CHECK-NEXT: callq pow@PLT 1137; CHECK-NEXT: movaps %xmm0, %xmm1 1138; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1139; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1140; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1141; CHECK-NEXT: addq $40, %rsp 1142; CHECK-NEXT: .cfi_def_cfa_offset 8 1143; CHECK-NEXT: retq 1144; 1145; AVX-LABEL: constrained_vector_pow_v4f64: 1146; AVX: # %bb.0: # %entry 1147; AVX-NEXT: subq $40, %rsp 1148; AVX-NEXT: .cfi_def_cfa_offset 48 1149; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0] 1150; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 1151; AVX-NEXT: callq pow@PLT 1152; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1153; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 1154; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 1155; AVX-NEXT: callq pow@PLT 1156; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1157; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1158; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1159; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1160; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 1161; AVX-NEXT: callq pow@PLT 1162; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1163; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1164; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [3.0E+0,0.0E+0] 1165; AVX-NEXT: callq pow@PLT 1166; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1167; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1168; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 1169; AVX-NEXT: addq $40, %rsp 1170; AVX-NEXT: .cfi_def_cfa_offset 8 1171; AVX-NEXT: retq 1172entry: 1173 %pow = call <4 x double> @llvm.experimental.constrained.pow.v4f64( 1174 <4 x double> <double 42.1, double 42.2, 1175 double 42.3, double 42.4>, 1176 <4 x double> <double 3.0, double 3.0, 1177 double 3.0, double 3.0>, 1178 metadata !"round.dynamic", 1179 metadata !"fpexcept.strict") #0 1180 ret <4 x double> %pow 1181} 1182 1183define <1 x float> @constrained_vector_powi_v1f32() #0 { 1184; CHECK-LABEL: constrained_vector_powi_v1f32: 1185; CHECK: # %bb.0: # %entry 1186; CHECK-NEXT: pushq %rax 1187; CHECK-NEXT: .cfi_def_cfa_offset 16 1188; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1189; CHECK-NEXT: movl $3, %edi 1190; CHECK-NEXT: callq __powisf2@PLT 1191; CHECK-NEXT: popq %rax 1192; CHECK-NEXT: .cfi_def_cfa_offset 8 1193; CHECK-NEXT: retq 1194; 1195; AVX-LABEL: constrained_vector_powi_v1f32: 1196; AVX: # %bb.0: # %entry 1197; AVX-NEXT: pushq %rax 1198; AVX-NEXT: .cfi_def_cfa_offset 16 1199; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1200; AVX-NEXT: movl $3, %edi 1201; AVX-NEXT: callq __powisf2@PLT 1202; AVX-NEXT: popq %rax 1203; AVX-NEXT: .cfi_def_cfa_offset 8 1204; AVX-NEXT: retq 1205entry: 1206 %powi = call <1 x float> @llvm.experimental.constrained.powi.v1f32( 1207 <1 x float> <float 42.0>, 1208 i32 3, 1209 metadata !"round.dynamic", 1210 metadata !"fpexcept.strict") #0 1211 ret <1 x float> %powi 1212} 1213 1214define <2 x double> @constrained_vector_powi_v2f64() #0 { 1215; CHECK-LABEL: constrained_vector_powi_v2f64: 1216; CHECK: # %bb.0: # %entry 1217; CHECK-NEXT: subq $24, %rsp 1218; CHECK-NEXT: .cfi_def_cfa_offset 32 1219; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1220; CHECK-NEXT: movl $3, %edi 1221; CHECK-NEXT: callq __powidf2@PLT 1222; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1223; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1224; CHECK-NEXT: movl $3, %edi 1225; CHECK-NEXT: callq __powidf2@PLT 1226; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1227; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1228; CHECK-NEXT: addq $24, %rsp 1229; CHECK-NEXT: .cfi_def_cfa_offset 8 1230; CHECK-NEXT: retq 1231; 1232; AVX-LABEL: constrained_vector_powi_v2f64: 1233; AVX: # %bb.0: # %entry 1234; AVX-NEXT: subq $24, %rsp 1235; AVX-NEXT: .cfi_def_cfa_offset 32 1236; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1237; AVX-NEXT: movl $3, %edi 1238; AVX-NEXT: callq __powidf2@PLT 1239; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1240; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1241; AVX-NEXT: movl $3, %edi 1242; AVX-NEXT: callq __powidf2@PLT 1243; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1244; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1245; AVX-NEXT: addq $24, %rsp 1246; AVX-NEXT: .cfi_def_cfa_offset 8 1247; AVX-NEXT: retq 1248entry: 1249 %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64( 1250 <2 x double> <double 42.1, double 42.2>, 1251 i32 3, 1252 metadata !"round.dynamic", 1253 metadata !"fpexcept.strict") #0 1254 ret <2 x double> %powi 1255} 1256 1257define <3 x float> @constrained_vector_powi_v3f32() #0 { 1258; CHECK-LABEL: constrained_vector_powi_v3f32: 1259; CHECK: # %bb.0: # %entry 1260; CHECK-NEXT: subq $40, %rsp 1261; CHECK-NEXT: .cfi_def_cfa_offset 48 1262; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 1263; CHECK-NEXT: movl $3, %edi 1264; CHECK-NEXT: callq __powisf2@PLT 1265; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1266; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1267; CHECK-NEXT: movl $3, %edi 1268; CHECK-NEXT: callq __powisf2@PLT 1269; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1270; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 1271; CHECK-NEXT: movl $3, %edi 1272; CHECK-NEXT: callq __powisf2@PLT 1273; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 1274; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1275; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1276; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1277; CHECK-NEXT: movaps %xmm1, %xmm0 1278; CHECK-NEXT: addq $40, %rsp 1279; CHECK-NEXT: .cfi_def_cfa_offset 8 1280; CHECK-NEXT: retq 1281; 1282; AVX-LABEL: constrained_vector_powi_v3f32: 1283; AVX: # %bb.0: # %entry 1284; AVX-NEXT: subq $40, %rsp 1285; AVX-NEXT: .cfi_def_cfa_offset 48 1286; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 1287; AVX-NEXT: movl $3, %edi 1288; AVX-NEXT: callq __powisf2@PLT 1289; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1290; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1291; AVX-NEXT: movl $3, %edi 1292; AVX-NEXT: callq __powisf2@PLT 1293; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1294; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 1295; AVX-NEXT: movl $3, %edi 1296; AVX-NEXT: callq __powisf2@PLT 1297; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1298; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1299; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1300; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 1301; AVX-NEXT: addq $40, %rsp 1302; AVX-NEXT: .cfi_def_cfa_offset 8 1303; AVX-NEXT: retq 1304entry: 1305 %powi = call <3 x float> @llvm.experimental.constrained.powi.v3f32( 1306 <3 x float> <float 42.0, float 43.0, float 44.0>, 1307 i32 3, 1308 metadata !"round.dynamic", 1309 metadata !"fpexcept.strict") #0 1310 ret <3 x float> %powi 1311} 1312 1313define <3 x double> @constrained_vector_powi_v3f64() #0 { 1314; CHECK-LABEL: constrained_vector_powi_v3f64: 1315; CHECK: # %bb.0: # %entry 1316; CHECK-NEXT: subq $24, %rsp 1317; CHECK-NEXT: .cfi_def_cfa_offset 32 1318; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1319; CHECK-NEXT: movl $3, %edi 1320; CHECK-NEXT: callq __powidf2@PLT 1321; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1322; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1323; CHECK-NEXT: movl $3, %edi 1324; CHECK-NEXT: callq __powidf2@PLT 1325; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 1326; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1327; CHECK-NEXT: movl $3, %edi 1328; CHECK-NEXT: callq __powidf2@PLT 1329; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 1330; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 1331; CHECK-NEXT: wait 1332; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 1333; CHECK-NEXT: # xmm0 = mem[0],zero 1334; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 1335; CHECK-NEXT: # xmm1 = mem[0],zero 1336; CHECK-NEXT: addq $24, %rsp 1337; CHECK-NEXT: .cfi_def_cfa_offset 8 1338; CHECK-NEXT: retq 1339; 1340; AVX-LABEL: constrained_vector_powi_v3f64: 1341; AVX: # %bb.0: # %entry 1342; AVX-NEXT: subq $40, %rsp 1343; AVX-NEXT: .cfi_def_cfa_offset 48 1344; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1345; AVX-NEXT: movl $3, %edi 1346; AVX-NEXT: callq __powidf2@PLT 1347; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1348; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1349; AVX-NEXT: movl $3, %edi 1350; AVX-NEXT: callq __powidf2@PLT 1351; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1352; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1353; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 1354; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1355; AVX-NEXT: movl $3, %edi 1356; AVX-NEXT: vzeroupper 1357; AVX-NEXT: callq __powidf2@PLT 1358; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 1359; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1360; AVX-NEXT: addq $40, %rsp 1361; AVX-NEXT: .cfi_def_cfa_offset 8 1362; AVX-NEXT: retq 1363entry: 1364 %powi = call <3 x double> @llvm.experimental.constrained.powi.v3f64( 1365 <3 x double> <double 42.0, double 42.1, double 42.2>, 1366 i32 3, 1367 metadata !"round.dynamic", 1368 metadata !"fpexcept.strict") #0 1369 ret <3 x double> %powi 1370} 1371 1372define <4 x double> @constrained_vector_powi_v4f64() #0 { 1373; CHECK-LABEL: constrained_vector_powi_v4f64: 1374; CHECK: # %bb.0: # %entry 1375; CHECK-NEXT: subq $40, %rsp 1376; CHECK-NEXT: .cfi_def_cfa_offset 48 1377; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1378; CHECK-NEXT: movl $3, %edi 1379; CHECK-NEXT: callq __powidf2@PLT 1380; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1381; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1382; CHECK-NEXT: movl $3, %edi 1383; CHECK-NEXT: callq __powidf2@PLT 1384; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1385; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1386; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1387; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0] 1388; CHECK-NEXT: movl $3, %edi 1389; CHECK-NEXT: callq __powidf2@PLT 1390; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1391; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 1392; CHECK-NEXT: movl $3, %edi 1393; CHECK-NEXT: callq __powidf2@PLT 1394; CHECK-NEXT: movaps %xmm0, %xmm1 1395; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1396; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1397; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1398; CHECK-NEXT: addq $40, %rsp 1399; CHECK-NEXT: .cfi_def_cfa_offset 8 1400; CHECK-NEXT: retq 1401; 1402; AVX-LABEL: constrained_vector_powi_v4f64: 1403; AVX: # %bb.0: # %entry 1404; AVX-NEXT: subq $40, %rsp 1405; AVX-NEXT: .cfi_def_cfa_offset 48 1406; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0] 1407; AVX-NEXT: movl $3, %edi 1408; AVX-NEXT: callq __powidf2@PLT 1409; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1410; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 1411; AVX-NEXT: movl $3, %edi 1412; AVX-NEXT: callq __powidf2@PLT 1413; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1414; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1415; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1416; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1417; AVX-NEXT: movl $3, %edi 1418; AVX-NEXT: callq __powidf2@PLT 1419; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1420; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1421; AVX-NEXT: movl $3, %edi 1422; AVX-NEXT: callq __powidf2@PLT 1423; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1424; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1425; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 1426; AVX-NEXT: addq $40, %rsp 1427; AVX-NEXT: .cfi_def_cfa_offset 8 1428; AVX-NEXT: retq 1429entry: 1430 %powi = call <4 x double> @llvm.experimental.constrained.powi.v4f64( 1431 <4 x double> <double 42.1, double 42.2, 1432 double 42.3, double 42.4>, 1433 i32 3, 1434 metadata !"round.dynamic", 1435 metadata !"fpexcept.strict") #0 1436 ret <4 x double> %powi 1437} 1438 1439define <1 x float> @constrained_vector_sin_v1f32() #0 { 1440; CHECK-LABEL: constrained_vector_sin_v1f32: 1441; CHECK: # %bb.0: # %entry 1442; CHECK-NEXT: pushq %rax 1443; CHECK-NEXT: .cfi_def_cfa_offset 16 1444; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1445; CHECK-NEXT: callq sinf@PLT 1446; CHECK-NEXT: popq %rax 1447; CHECK-NEXT: .cfi_def_cfa_offset 8 1448; CHECK-NEXT: retq 1449; 1450; AVX-LABEL: constrained_vector_sin_v1f32: 1451; AVX: # %bb.0: # %entry 1452; AVX-NEXT: pushq %rax 1453; AVX-NEXT: .cfi_def_cfa_offset 16 1454; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1455; AVX-NEXT: callq sinf@PLT 1456; AVX-NEXT: popq %rax 1457; AVX-NEXT: .cfi_def_cfa_offset 8 1458; AVX-NEXT: retq 1459entry: 1460 %sin = call <1 x float> @llvm.experimental.constrained.sin.v1f32( 1461 <1 x float> <float 42.0>, 1462 metadata !"round.dynamic", 1463 metadata !"fpexcept.strict") #0 1464 ret <1 x float> %sin 1465} 1466 1467define <2 x double> @constrained_vector_sin_v2f64() #0 { 1468; CHECK-LABEL: constrained_vector_sin_v2f64: 1469; CHECK: # %bb.0: # %entry 1470; CHECK-NEXT: subq $24, %rsp 1471; CHECK-NEXT: .cfi_def_cfa_offset 32 1472; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1473; CHECK-NEXT: callq sin@PLT 1474; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1475; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1476; CHECK-NEXT: callq sin@PLT 1477; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1478; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1479; CHECK-NEXT: addq $24, %rsp 1480; CHECK-NEXT: .cfi_def_cfa_offset 8 1481; CHECK-NEXT: retq 1482; 1483; AVX-LABEL: constrained_vector_sin_v2f64: 1484; AVX: # %bb.0: # %entry 1485; AVX-NEXT: subq $24, %rsp 1486; AVX-NEXT: .cfi_def_cfa_offset 32 1487; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1488; AVX-NEXT: callq sin@PLT 1489; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1490; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1491; AVX-NEXT: callq sin@PLT 1492; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1493; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1494; AVX-NEXT: addq $24, %rsp 1495; AVX-NEXT: .cfi_def_cfa_offset 8 1496; AVX-NEXT: retq 1497entry: 1498 %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64( 1499 <2 x double> <double 42.0, double 42.1>, 1500 metadata !"round.dynamic", 1501 metadata !"fpexcept.strict") #0 1502 ret <2 x double> %sin 1503} 1504 1505define <3 x float> @constrained_vector_sin_v3f32() #0 { 1506; CHECK-LABEL: constrained_vector_sin_v3f32: 1507; CHECK: # %bb.0: # %entry 1508; CHECK-NEXT: subq $40, %rsp 1509; CHECK-NEXT: .cfi_def_cfa_offset 48 1510; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 1511; CHECK-NEXT: callq sinf@PLT 1512; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1513; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1514; CHECK-NEXT: callq sinf@PLT 1515; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1516; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 1517; CHECK-NEXT: callq sinf@PLT 1518; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 1519; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1520; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1521; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1522; CHECK-NEXT: movaps %xmm1, %xmm0 1523; CHECK-NEXT: addq $40, %rsp 1524; CHECK-NEXT: .cfi_def_cfa_offset 8 1525; CHECK-NEXT: retq 1526; 1527; AVX-LABEL: constrained_vector_sin_v3f32: 1528; AVX: # %bb.0: # %entry 1529; AVX-NEXT: subq $40, %rsp 1530; AVX-NEXT: .cfi_def_cfa_offset 48 1531; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 1532; AVX-NEXT: callq sinf@PLT 1533; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1534; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1535; AVX-NEXT: callq sinf@PLT 1536; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1537; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 1538; AVX-NEXT: callq sinf@PLT 1539; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1540; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1541; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1542; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 1543; AVX-NEXT: addq $40, %rsp 1544; AVX-NEXT: .cfi_def_cfa_offset 8 1545; AVX-NEXT: retq 1546entry: 1547 %sin = call <3 x float> @llvm.experimental.constrained.sin.v3f32( 1548 <3 x float> <float 42.0, float 43.0, float 44.0>, 1549 metadata !"round.dynamic", 1550 metadata !"fpexcept.strict") #0 1551 ret <3 x float> %sin 1552} 1553 1554define <3 x double> @constrained_vector_sin_v3f64() #0 { 1555; CHECK-LABEL: constrained_vector_sin_v3f64: 1556; CHECK: # %bb.0: # %entry 1557; CHECK-NEXT: subq $24, %rsp 1558; CHECK-NEXT: .cfi_def_cfa_offset 32 1559; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1560; CHECK-NEXT: callq sin@PLT 1561; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1562; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1563; CHECK-NEXT: callq sin@PLT 1564; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 1565; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1566; CHECK-NEXT: callq sin@PLT 1567; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 1568; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 1569; CHECK-NEXT: wait 1570; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 1571; CHECK-NEXT: # xmm0 = mem[0],zero 1572; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 1573; CHECK-NEXT: # xmm1 = mem[0],zero 1574; CHECK-NEXT: addq $24, %rsp 1575; CHECK-NEXT: .cfi_def_cfa_offset 8 1576; CHECK-NEXT: retq 1577; 1578; AVX-LABEL: constrained_vector_sin_v3f64: 1579; AVX: # %bb.0: # %entry 1580; AVX-NEXT: subq $40, %rsp 1581; AVX-NEXT: .cfi_def_cfa_offset 48 1582; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1583; AVX-NEXT: callq sin@PLT 1584; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1585; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1586; AVX-NEXT: callq sin@PLT 1587; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1588; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1589; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 1590; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1591; AVX-NEXT: vzeroupper 1592; AVX-NEXT: callq sin@PLT 1593; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 1594; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1595; AVX-NEXT: addq $40, %rsp 1596; AVX-NEXT: .cfi_def_cfa_offset 8 1597; AVX-NEXT: retq 1598entry: 1599 %sin = call <3 x double> @llvm.experimental.constrained.sin.v3f64( 1600 <3 x double> <double 42.0, double 42.1, double 42.2>, 1601 metadata !"round.dynamic", 1602 metadata !"fpexcept.strict") #0 1603 ret <3 x double> %sin 1604} 1605 1606define <4 x double> @constrained_vector_sin_v4f64() #0 { 1607; CHECK-LABEL: constrained_vector_sin_v4f64: 1608; CHECK: # %bb.0: # %entry 1609; CHECK-NEXT: subq $40, %rsp 1610; CHECK-NEXT: .cfi_def_cfa_offset 48 1611; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1612; CHECK-NEXT: callq sin@PLT 1613; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1614; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1615; CHECK-NEXT: callq sin@PLT 1616; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1617; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1618; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1619; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 1620; CHECK-NEXT: callq sin@PLT 1621; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1622; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1623; CHECK-NEXT: callq sin@PLT 1624; CHECK-NEXT: movaps %xmm0, %xmm1 1625; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1626; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1627; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1628; CHECK-NEXT: addq $40, %rsp 1629; CHECK-NEXT: .cfi_def_cfa_offset 8 1630; CHECK-NEXT: retq 1631; 1632; AVX-LABEL: constrained_vector_sin_v4f64: 1633; AVX: # %bb.0: # %entry 1634; AVX-NEXT: subq $40, %rsp 1635; AVX-NEXT: .cfi_def_cfa_offset 48 1636; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 1637; AVX-NEXT: callq sin@PLT 1638; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1639; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1640; AVX-NEXT: callq sin@PLT 1641; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1642; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1643; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1644; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1645; AVX-NEXT: callq sin@PLT 1646; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1647; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1648; AVX-NEXT: callq sin@PLT 1649; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1650; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1651; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 1652; AVX-NEXT: addq $40, %rsp 1653; AVX-NEXT: .cfi_def_cfa_offset 8 1654; AVX-NEXT: retq 1655entry: 1656 %sin = call <4 x double> @llvm.experimental.constrained.sin.v4f64( 1657 <4 x double> <double 42.0, double 42.1, 1658 double 42.2, double 42.3>, 1659 metadata !"round.dynamic", 1660 metadata !"fpexcept.strict") #0 1661 ret <4 x double> %sin 1662} 1663 1664define <1 x float> @constrained_vector_cos_v1f32() #0 { 1665; CHECK-LABEL: constrained_vector_cos_v1f32: 1666; CHECK: # %bb.0: # %entry 1667; CHECK-NEXT: pushq %rax 1668; CHECK-NEXT: .cfi_def_cfa_offset 16 1669; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1670; CHECK-NEXT: callq cosf@PLT 1671; CHECK-NEXT: popq %rax 1672; CHECK-NEXT: .cfi_def_cfa_offset 8 1673; CHECK-NEXT: retq 1674; 1675; AVX-LABEL: constrained_vector_cos_v1f32: 1676; AVX: # %bb.0: # %entry 1677; AVX-NEXT: pushq %rax 1678; AVX-NEXT: .cfi_def_cfa_offset 16 1679; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1680; AVX-NEXT: callq cosf@PLT 1681; AVX-NEXT: popq %rax 1682; AVX-NEXT: .cfi_def_cfa_offset 8 1683; AVX-NEXT: retq 1684entry: 1685 %cos = call <1 x float> @llvm.experimental.constrained.cos.v1f32( 1686 <1 x float> <float 42.0>, 1687 metadata !"round.dynamic", 1688 metadata !"fpexcept.strict") #0 1689 ret <1 x float> %cos 1690} 1691 1692define <2 x double> @constrained_vector_cos_v2f64() #0 { 1693; CHECK-LABEL: constrained_vector_cos_v2f64: 1694; CHECK: # %bb.0: # %entry 1695; CHECK-NEXT: subq $24, %rsp 1696; CHECK-NEXT: .cfi_def_cfa_offset 32 1697; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1698; CHECK-NEXT: callq cos@PLT 1699; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1700; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1701; CHECK-NEXT: callq cos@PLT 1702; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1703; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1704; CHECK-NEXT: addq $24, %rsp 1705; CHECK-NEXT: .cfi_def_cfa_offset 8 1706; CHECK-NEXT: retq 1707; 1708; AVX-LABEL: constrained_vector_cos_v2f64: 1709; AVX: # %bb.0: # %entry 1710; AVX-NEXT: subq $24, %rsp 1711; AVX-NEXT: .cfi_def_cfa_offset 32 1712; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1713; AVX-NEXT: callq cos@PLT 1714; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1715; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1716; AVX-NEXT: callq cos@PLT 1717; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1718; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1719; AVX-NEXT: addq $24, %rsp 1720; AVX-NEXT: .cfi_def_cfa_offset 8 1721; AVX-NEXT: retq 1722entry: 1723 %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64( 1724 <2 x double> <double 42.0, double 42.1>, 1725 metadata !"round.dynamic", 1726 metadata !"fpexcept.strict") #0 1727 ret <2 x double> %cos 1728} 1729 1730define <3 x float> @constrained_vector_cos_v3f32() #0 { 1731; CHECK-LABEL: constrained_vector_cos_v3f32: 1732; CHECK: # %bb.0: # %entry 1733; CHECK-NEXT: subq $40, %rsp 1734; CHECK-NEXT: .cfi_def_cfa_offset 48 1735; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 1736; CHECK-NEXT: callq cosf@PLT 1737; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1738; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1739; CHECK-NEXT: callq cosf@PLT 1740; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1741; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 1742; CHECK-NEXT: callq cosf@PLT 1743; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 1744; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1745; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1746; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1747; CHECK-NEXT: movaps %xmm1, %xmm0 1748; CHECK-NEXT: addq $40, %rsp 1749; CHECK-NEXT: .cfi_def_cfa_offset 8 1750; CHECK-NEXT: retq 1751; 1752; AVX-LABEL: constrained_vector_cos_v3f32: 1753; AVX: # %bb.0: # %entry 1754; AVX-NEXT: subq $40, %rsp 1755; AVX-NEXT: .cfi_def_cfa_offset 48 1756; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 1757; AVX-NEXT: callq cosf@PLT 1758; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1759; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1760; AVX-NEXT: callq cosf@PLT 1761; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1762; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 1763; AVX-NEXT: callq cosf@PLT 1764; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1765; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1766; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1767; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 1768; AVX-NEXT: addq $40, %rsp 1769; AVX-NEXT: .cfi_def_cfa_offset 8 1770; AVX-NEXT: retq 1771entry: 1772 %cos = call <3 x float> @llvm.experimental.constrained.cos.v3f32( 1773 <3 x float> <float 42.0, float 43.0, float 44.0>, 1774 metadata !"round.dynamic", 1775 metadata !"fpexcept.strict") #0 1776 ret <3 x float> %cos 1777} 1778 1779define <3 x double> @constrained_vector_cos_v3f64() #0 { 1780; CHECK-LABEL: constrained_vector_cos_v3f64: 1781; CHECK: # %bb.0: # %entry 1782; CHECK-NEXT: subq $24, %rsp 1783; CHECK-NEXT: .cfi_def_cfa_offset 32 1784; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1785; CHECK-NEXT: callq cos@PLT 1786; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 1787; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1788; CHECK-NEXT: callq cos@PLT 1789; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 1790; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1791; CHECK-NEXT: callq cos@PLT 1792; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 1793; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 1794; CHECK-NEXT: wait 1795; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 1796; CHECK-NEXT: # xmm0 = mem[0],zero 1797; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 1798; CHECK-NEXT: # xmm1 = mem[0],zero 1799; CHECK-NEXT: addq $24, %rsp 1800; CHECK-NEXT: .cfi_def_cfa_offset 8 1801; CHECK-NEXT: retq 1802; 1803; AVX-LABEL: constrained_vector_cos_v3f64: 1804; AVX: # %bb.0: # %entry 1805; AVX-NEXT: subq $40, %rsp 1806; AVX-NEXT: .cfi_def_cfa_offset 48 1807; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1808; AVX-NEXT: callq cos@PLT 1809; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1810; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1811; AVX-NEXT: callq cos@PLT 1812; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1813; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1814; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 1815; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1816; AVX-NEXT: vzeroupper 1817; AVX-NEXT: callq cos@PLT 1818; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 1819; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1820; AVX-NEXT: addq $40, %rsp 1821; AVX-NEXT: .cfi_def_cfa_offset 8 1822; AVX-NEXT: retq 1823entry: 1824 %cos = call <3 x double> @llvm.experimental.constrained.cos.v3f64( 1825 <3 x double> <double 42.0, double 42.1, double 42.2>, 1826 metadata !"round.dynamic", 1827 metadata !"fpexcept.strict") #0 1828 ret <3 x double> %cos 1829} 1830 1831define <4 x double> @constrained_vector_cos_v4f64() #0 { 1832; CHECK-LABEL: constrained_vector_cos_v4f64: 1833; CHECK: # %bb.0: # %entry 1834; CHECK-NEXT: subq $40, %rsp 1835; CHECK-NEXT: .cfi_def_cfa_offset 48 1836; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1837; CHECK-NEXT: callq cos@PLT 1838; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1839; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1840; CHECK-NEXT: callq cos@PLT 1841; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1842; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1843; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1844; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 1845; CHECK-NEXT: callq cos@PLT 1846; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1847; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1848; CHECK-NEXT: callq cos@PLT 1849; CHECK-NEXT: movaps %xmm0, %xmm1 1850; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1851; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1852; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 1853; CHECK-NEXT: addq $40, %rsp 1854; CHECK-NEXT: .cfi_def_cfa_offset 8 1855; CHECK-NEXT: retq 1856; 1857; AVX-LABEL: constrained_vector_cos_v4f64: 1858; AVX: # %bb.0: # %entry 1859; AVX-NEXT: subq $40, %rsp 1860; AVX-NEXT: .cfi_def_cfa_offset 48 1861; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 1862; AVX-NEXT: callq cos@PLT 1863; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1864; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 1865; AVX-NEXT: callq cos@PLT 1866; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1867; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1868; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1869; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1870; AVX-NEXT: callq cos@PLT 1871; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1872; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1873; AVX-NEXT: callq cos@PLT 1874; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1875; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1876; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 1877; AVX-NEXT: addq $40, %rsp 1878; AVX-NEXT: .cfi_def_cfa_offset 8 1879; AVX-NEXT: retq 1880entry: 1881 %cos = call <4 x double> @llvm.experimental.constrained.cos.v4f64( 1882 <4 x double> <double 42.0, double 42.1, 1883 double 42.2, double 42.3>, 1884 metadata !"round.dynamic", 1885 metadata !"fpexcept.strict") #0 1886 ret <4 x double> %cos 1887} 1888 1889define <1 x float> @constrained_vector_exp_v1f32() #0 { 1890; CHECK-LABEL: constrained_vector_exp_v1f32: 1891; CHECK: # %bb.0: # %entry 1892; CHECK-NEXT: pushq %rax 1893; CHECK-NEXT: .cfi_def_cfa_offset 16 1894; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1895; CHECK-NEXT: callq expf@PLT 1896; CHECK-NEXT: popq %rax 1897; CHECK-NEXT: .cfi_def_cfa_offset 8 1898; CHECK-NEXT: retq 1899; 1900; AVX-LABEL: constrained_vector_exp_v1f32: 1901; AVX: # %bb.0: # %entry 1902; AVX-NEXT: pushq %rax 1903; AVX-NEXT: .cfi_def_cfa_offset 16 1904; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1905; AVX-NEXT: callq expf@PLT 1906; AVX-NEXT: popq %rax 1907; AVX-NEXT: .cfi_def_cfa_offset 8 1908; AVX-NEXT: retq 1909entry: 1910 %exp = call <1 x float> @llvm.experimental.constrained.exp.v1f32( 1911 <1 x float> <float 42.0>, 1912 metadata !"round.dynamic", 1913 metadata !"fpexcept.strict") #0 1914 ret <1 x float> %exp 1915} 1916 1917define <2 x double> @constrained_vector_exp_v2f64() #0 { 1918; CHECK-LABEL: constrained_vector_exp_v2f64: 1919; CHECK: # %bb.0: # %entry 1920; CHECK-NEXT: subq $24, %rsp 1921; CHECK-NEXT: .cfi_def_cfa_offset 32 1922; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1923; CHECK-NEXT: callq exp@PLT 1924; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1925; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1926; CHECK-NEXT: callq exp@PLT 1927; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 1928; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 1929; CHECK-NEXT: addq $24, %rsp 1930; CHECK-NEXT: .cfi_def_cfa_offset 8 1931; CHECK-NEXT: retq 1932; 1933; AVX-LABEL: constrained_vector_exp_v2f64: 1934; AVX: # %bb.0: # %entry 1935; AVX-NEXT: subq $24, %rsp 1936; AVX-NEXT: .cfi_def_cfa_offset 32 1937; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 1938; AVX-NEXT: callq exp@PLT 1939; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1940; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 1941; AVX-NEXT: callq exp@PLT 1942; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 1943; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 1944; AVX-NEXT: addq $24, %rsp 1945; AVX-NEXT: .cfi_def_cfa_offset 8 1946; AVX-NEXT: retq 1947entry: 1948 %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64( 1949 <2 x double> <double 42.0, double 42.1>, 1950 metadata !"round.dynamic", 1951 metadata !"fpexcept.strict") #0 1952 ret <2 x double> %exp 1953} 1954 1955define <3 x float> @constrained_vector_exp_v3f32() #0 { 1956; CHECK-LABEL: constrained_vector_exp_v3f32: 1957; CHECK: # %bb.0: # %entry 1958; CHECK-NEXT: subq $40, %rsp 1959; CHECK-NEXT: .cfi_def_cfa_offset 48 1960; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 1961; CHECK-NEXT: callq expf@PLT 1962; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1963; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1964; CHECK-NEXT: callq expf@PLT 1965; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 1966; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 1967; CHECK-NEXT: callq expf@PLT 1968; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 1969; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1970; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 1971; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 1972; CHECK-NEXT: movaps %xmm1, %xmm0 1973; CHECK-NEXT: addq $40, %rsp 1974; CHECK-NEXT: .cfi_def_cfa_offset 8 1975; CHECK-NEXT: retq 1976; 1977; AVX-LABEL: constrained_vector_exp_v3f32: 1978; AVX: # %bb.0: # %entry 1979; AVX-NEXT: subq $40, %rsp 1980; AVX-NEXT: .cfi_def_cfa_offset 48 1981; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 1982; AVX-NEXT: callq expf@PLT 1983; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 1984; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 1985; AVX-NEXT: callq expf@PLT 1986; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 1987; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 1988; AVX-NEXT: callq expf@PLT 1989; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 1990; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 1991; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 1992; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 1993; AVX-NEXT: addq $40, %rsp 1994; AVX-NEXT: .cfi_def_cfa_offset 8 1995; AVX-NEXT: retq 1996entry: 1997 %exp = call <3 x float> @llvm.experimental.constrained.exp.v3f32( 1998 <3 x float> <float 42.0, float 43.0, float 44.0>, 1999 metadata !"round.dynamic", 2000 metadata !"fpexcept.strict") #0 2001 ret <3 x float> %exp 2002} 2003 2004define <3 x double> @constrained_vector_exp_v3f64() #0 { 2005; CHECK-LABEL: constrained_vector_exp_v3f64: 2006; CHECK: # %bb.0: # %entry 2007; CHECK-NEXT: subq $24, %rsp 2008; CHECK-NEXT: .cfi_def_cfa_offset 32 2009; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2010; CHECK-NEXT: callq exp@PLT 2011; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2012; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2013; CHECK-NEXT: callq exp@PLT 2014; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 2015; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2016; CHECK-NEXT: callq exp@PLT 2017; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 2018; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 2019; CHECK-NEXT: wait 2020; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 2021; CHECK-NEXT: # xmm0 = mem[0],zero 2022; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 2023; CHECK-NEXT: # xmm1 = mem[0],zero 2024; CHECK-NEXT: addq $24, %rsp 2025; CHECK-NEXT: .cfi_def_cfa_offset 8 2026; CHECK-NEXT: retq 2027; 2028; AVX-LABEL: constrained_vector_exp_v3f64: 2029; AVX: # %bb.0: # %entry 2030; AVX-NEXT: subq $40, %rsp 2031; AVX-NEXT: .cfi_def_cfa_offset 48 2032; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2033; AVX-NEXT: callq exp@PLT 2034; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2035; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2036; AVX-NEXT: callq exp@PLT 2037; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2038; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2039; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 2040; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2041; AVX-NEXT: vzeroupper 2042; AVX-NEXT: callq exp@PLT 2043; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 2044; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2045; AVX-NEXT: addq $40, %rsp 2046; AVX-NEXT: .cfi_def_cfa_offset 8 2047; AVX-NEXT: retq 2048entry: 2049 %exp = call <3 x double> @llvm.experimental.constrained.exp.v3f64( 2050 <3 x double> <double 42.0, double 42.1, double 42.2>, 2051 metadata !"round.dynamic", 2052 metadata !"fpexcept.strict") #0 2053 ret <3 x double> %exp 2054} 2055 2056define <4 x double> @constrained_vector_exp_v4f64() #0 { 2057; CHECK-LABEL: constrained_vector_exp_v4f64: 2058; CHECK: # %bb.0: # %entry 2059; CHECK-NEXT: subq $40, %rsp 2060; CHECK-NEXT: .cfi_def_cfa_offset 48 2061; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2062; CHECK-NEXT: callq exp@PLT 2063; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2064; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2065; CHECK-NEXT: callq exp@PLT 2066; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2067; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2068; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2069; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 2070; CHECK-NEXT: callq exp@PLT 2071; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2072; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2073; CHECK-NEXT: callq exp@PLT 2074; CHECK-NEXT: movaps %xmm0, %xmm1 2075; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2076; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2077; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 2078; CHECK-NEXT: addq $40, %rsp 2079; CHECK-NEXT: .cfi_def_cfa_offset 8 2080; CHECK-NEXT: retq 2081; 2082; AVX-LABEL: constrained_vector_exp_v4f64: 2083; AVX: # %bb.0: # %entry 2084; AVX-NEXT: subq $40, %rsp 2085; AVX-NEXT: .cfi_def_cfa_offset 48 2086; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 2087; AVX-NEXT: callq exp@PLT 2088; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2089; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2090; AVX-NEXT: callq exp@PLT 2091; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2092; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2093; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2094; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2095; AVX-NEXT: callq exp@PLT 2096; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2097; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2098; AVX-NEXT: callq exp@PLT 2099; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2100; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2101; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 2102; AVX-NEXT: addq $40, %rsp 2103; AVX-NEXT: .cfi_def_cfa_offset 8 2104; AVX-NEXT: retq 2105entry: 2106 %exp = call <4 x double> @llvm.experimental.constrained.exp.v4f64( 2107 <4 x double> <double 42.0, double 42.1, 2108 double 42.2, double 42.3>, 2109 metadata !"round.dynamic", 2110 metadata !"fpexcept.strict") #0 2111 ret <4 x double> %exp 2112} 2113 2114define <1 x float> @constrained_vector_exp2_v1f32() #0 { 2115; CHECK-LABEL: constrained_vector_exp2_v1f32: 2116; CHECK: # %bb.0: # %entry 2117; CHECK-NEXT: pushq %rax 2118; CHECK-NEXT: .cfi_def_cfa_offset 16 2119; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2120; CHECK-NEXT: callq exp2f@PLT 2121; CHECK-NEXT: popq %rax 2122; CHECK-NEXT: .cfi_def_cfa_offset 8 2123; CHECK-NEXT: retq 2124; 2125; AVX-LABEL: constrained_vector_exp2_v1f32: 2126; AVX: # %bb.0: # %entry 2127; AVX-NEXT: pushq %rax 2128; AVX-NEXT: .cfi_def_cfa_offset 16 2129; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2130; AVX-NEXT: callq exp2f@PLT 2131; AVX-NEXT: popq %rax 2132; AVX-NEXT: .cfi_def_cfa_offset 8 2133; AVX-NEXT: retq 2134entry: 2135 %exp2 = call <1 x float> @llvm.experimental.constrained.exp2.v1f32( 2136 <1 x float> <float 42.0>, 2137 metadata !"round.dynamic", 2138 metadata !"fpexcept.strict") #0 2139 ret <1 x float> %exp2 2140} 2141 2142define <2 x double> @constrained_vector_exp2_v2f64() #0 { 2143; CHECK-LABEL: constrained_vector_exp2_v2f64: 2144; CHECK: # %bb.0: # %entry 2145; CHECK-NEXT: subq $24, %rsp 2146; CHECK-NEXT: .cfi_def_cfa_offset 32 2147; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2148; CHECK-NEXT: callq exp2@PLT 2149; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2150; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2151; CHECK-NEXT: callq exp2@PLT 2152; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2153; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2154; CHECK-NEXT: addq $24, %rsp 2155; CHECK-NEXT: .cfi_def_cfa_offset 8 2156; CHECK-NEXT: retq 2157; 2158; AVX-LABEL: constrained_vector_exp2_v2f64: 2159; AVX: # %bb.0: # %entry 2160; AVX-NEXT: subq $24, %rsp 2161; AVX-NEXT: .cfi_def_cfa_offset 32 2162; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2163; AVX-NEXT: callq exp2@PLT 2164; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2165; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2166; AVX-NEXT: callq exp2@PLT 2167; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2168; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2169; AVX-NEXT: addq $24, %rsp 2170; AVX-NEXT: .cfi_def_cfa_offset 8 2171; AVX-NEXT: retq 2172entry: 2173 %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64( 2174 <2 x double> <double 42.1, double 42.0>, 2175 metadata !"round.dynamic", 2176 metadata !"fpexcept.strict") #0 2177 ret <2 x double> %exp2 2178} 2179 2180define <3 x float> @constrained_vector_exp2_v3f32() #0 { 2181; CHECK-LABEL: constrained_vector_exp2_v3f32: 2182; CHECK: # %bb.0: # %entry 2183; CHECK-NEXT: subq $40, %rsp 2184; CHECK-NEXT: .cfi_def_cfa_offset 48 2185; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 2186; CHECK-NEXT: callq exp2f@PLT 2187; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2188; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2189; CHECK-NEXT: callq exp2f@PLT 2190; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2191; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 2192; CHECK-NEXT: callq exp2f@PLT 2193; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 2194; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2195; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2196; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2197; CHECK-NEXT: movaps %xmm1, %xmm0 2198; CHECK-NEXT: addq $40, %rsp 2199; CHECK-NEXT: .cfi_def_cfa_offset 8 2200; CHECK-NEXT: retq 2201; 2202; AVX-LABEL: constrained_vector_exp2_v3f32: 2203; AVX: # %bb.0: # %entry 2204; AVX-NEXT: subq $40, %rsp 2205; AVX-NEXT: .cfi_def_cfa_offset 48 2206; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 2207; AVX-NEXT: callq exp2f@PLT 2208; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2209; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2210; AVX-NEXT: callq exp2f@PLT 2211; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2212; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 2213; AVX-NEXT: callq exp2f@PLT 2214; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 2215; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 2216; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2217; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 2218; AVX-NEXT: addq $40, %rsp 2219; AVX-NEXT: .cfi_def_cfa_offset 8 2220; AVX-NEXT: retq 2221entry: 2222 %exp2 = call <3 x float> @llvm.experimental.constrained.exp2.v3f32( 2223 <3 x float> <float 42.0, float 43.0, float 44.0>, 2224 metadata !"round.dynamic", 2225 metadata !"fpexcept.strict") #0 2226 ret <3 x float> %exp2 2227} 2228 2229define <3 x double> @constrained_vector_exp2_v3f64() #0 { 2230; CHECK-LABEL: constrained_vector_exp2_v3f64: 2231; CHECK: # %bb.0: # %entry 2232; CHECK-NEXT: subq $24, %rsp 2233; CHECK-NEXT: .cfi_def_cfa_offset 32 2234; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2235; CHECK-NEXT: callq exp2@PLT 2236; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2237; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2238; CHECK-NEXT: callq exp2@PLT 2239; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 2240; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2241; CHECK-NEXT: callq exp2@PLT 2242; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 2243; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 2244; CHECK-NEXT: wait 2245; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 2246; CHECK-NEXT: # xmm0 = mem[0],zero 2247; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 2248; CHECK-NEXT: # xmm1 = mem[0],zero 2249; CHECK-NEXT: addq $24, %rsp 2250; CHECK-NEXT: .cfi_def_cfa_offset 8 2251; CHECK-NEXT: retq 2252; 2253; AVX-LABEL: constrained_vector_exp2_v3f64: 2254; AVX: # %bb.0: # %entry 2255; AVX-NEXT: subq $40, %rsp 2256; AVX-NEXT: .cfi_def_cfa_offset 48 2257; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2258; AVX-NEXT: callq exp2@PLT 2259; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2260; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2261; AVX-NEXT: callq exp2@PLT 2262; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2263; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2264; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 2265; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2266; AVX-NEXT: vzeroupper 2267; AVX-NEXT: callq exp2@PLT 2268; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 2269; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2270; AVX-NEXT: addq $40, %rsp 2271; AVX-NEXT: .cfi_def_cfa_offset 8 2272; AVX-NEXT: retq 2273entry: 2274 %exp2 = call <3 x double> @llvm.experimental.constrained.exp2.v3f64( 2275 <3 x double> <double 42.0, double 42.1, double 42.2>, 2276 metadata !"round.dynamic", 2277 metadata !"fpexcept.strict") #0 2278 ret <3 x double> %exp2 2279} 2280 2281define <4 x double> @constrained_vector_exp2_v4f64() #0 { 2282; CHECK-LABEL: constrained_vector_exp2_v4f64: 2283; CHECK: # %bb.0: # %entry 2284; CHECK-NEXT: subq $40, %rsp 2285; CHECK-NEXT: .cfi_def_cfa_offset 48 2286; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2287; CHECK-NEXT: callq exp2@PLT 2288; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2289; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2290; CHECK-NEXT: callq exp2@PLT 2291; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2292; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2293; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2294; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0] 2295; CHECK-NEXT: callq exp2@PLT 2296; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2297; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 2298; CHECK-NEXT: callq exp2@PLT 2299; CHECK-NEXT: movaps %xmm0, %xmm1 2300; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2301; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2302; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 2303; CHECK-NEXT: addq $40, %rsp 2304; CHECK-NEXT: .cfi_def_cfa_offset 8 2305; CHECK-NEXT: retq 2306; 2307; AVX-LABEL: constrained_vector_exp2_v4f64: 2308; AVX: # %bb.0: # %entry 2309; AVX-NEXT: subq $40, %rsp 2310; AVX-NEXT: .cfi_def_cfa_offset 48 2311; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0] 2312; AVX-NEXT: callq exp2@PLT 2313; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2314; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 2315; AVX-NEXT: callq exp2@PLT 2316; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2317; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2318; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2319; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2320; AVX-NEXT: callq exp2@PLT 2321; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2322; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2323; AVX-NEXT: callq exp2@PLT 2324; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2325; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2326; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 2327; AVX-NEXT: addq $40, %rsp 2328; AVX-NEXT: .cfi_def_cfa_offset 8 2329; AVX-NEXT: retq 2330entry: 2331 %exp2 = call <4 x double> @llvm.experimental.constrained.exp2.v4f64( 2332 <4 x double> <double 42.1, double 42.2, 2333 double 42.3, double 42.4>, 2334 metadata !"round.dynamic", 2335 metadata !"fpexcept.strict") #0 2336 ret <4 x double> %exp2 2337} 2338 2339define <1 x float> @constrained_vector_log_v1f32() #0 { 2340; CHECK-LABEL: constrained_vector_log_v1f32: 2341; CHECK: # %bb.0: # %entry 2342; CHECK-NEXT: pushq %rax 2343; CHECK-NEXT: .cfi_def_cfa_offset 16 2344; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2345; CHECK-NEXT: callq logf@PLT 2346; CHECK-NEXT: popq %rax 2347; CHECK-NEXT: .cfi_def_cfa_offset 8 2348; CHECK-NEXT: retq 2349; 2350; AVX-LABEL: constrained_vector_log_v1f32: 2351; AVX: # %bb.0: # %entry 2352; AVX-NEXT: pushq %rax 2353; AVX-NEXT: .cfi_def_cfa_offset 16 2354; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2355; AVX-NEXT: callq logf@PLT 2356; AVX-NEXT: popq %rax 2357; AVX-NEXT: .cfi_def_cfa_offset 8 2358; AVX-NEXT: retq 2359entry: 2360 %log = call <1 x float> @llvm.experimental.constrained.log.v1f32( 2361 <1 x float> <float 42.0>, 2362 metadata !"round.dynamic", 2363 metadata !"fpexcept.strict") #0 2364 ret <1 x float> %log 2365} 2366 2367define <2 x double> @constrained_vector_log_v2f64() #0 { 2368; CHECK-LABEL: constrained_vector_log_v2f64: 2369; CHECK: # %bb.0: # %entry 2370; CHECK-NEXT: subq $24, %rsp 2371; CHECK-NEXT: .cfi_def_cfa_offset 32 2372; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2373; CHECK-NEXT: callq log@PLT 2374; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2375; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2376; CHECK-NEXT: callq log@PLT 2377; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2378; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2379; CHECK-NEXT: addq $24, %rsp 2380; CHECK-NEXT: .cfi_def_cfa_offset 8 2381; CHECK-NEXT: retq 2382; 2383; AVX-LABEL: constrained_vector_log_v2f64: 2384; AVX: # %bb.0: # %entry 2385; AVX-NEXT: subq $24, %rsp 2386; AVX-NEXT: .cfi_def_cfa_offset 32 2387; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2388; AVX-NEXT: callq log@PLT 2389; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2390; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2391; AVX-NEXT: callq log@PLT 2392; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2393; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2394; AVX-NEXT: addq $24, %rsp 2395; AVX-NEXT: .cfi_def_cfa_offset 8 2396; AVX-NEXT: retq 2397entry: 2398 %log = call <2 x double> @llvm.experimental.constrained.log.v2f64( 2399 <2 x double> <double 42.0, double 42.1>, 2400 metadata !"round.dynamic", 2401 metadata !"fpexcept.strict") #0 2402 ret <2 x double> %log 2403} 2404 2405define <3 x float> @constrained_vector_log_v3f32() #0 { 2406; CHECK-LABEL: constrained_vector_log_v3f32: 2407; CHECK: # %bb.0: # %entry 2408; CHECK-NEXT: subq $40, %rsp 2409; CHECK-NEXT: .cfi_def_cfa_offset 48 2410; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 2411; CHECK-NEXT: callq logf@PLT 2412; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2413; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2414; CHECK-NEXT: callq logf@PLT 2415; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2416; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 2417; CHECK-NEXT: callq logf@PLT 2418; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 2419; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2420; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2421; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2422; CHECK-NEXT: movaps %xmm1, %xmm0 2423; CHECK-NEXT: addq $40, %rsp 2424; CHECK-NEXT: .cfi_def_cfa_offset 8 2425; CHECK-NEXT: retq 2426; 2427; AVX-LABEL: constrained_vector_log_v3f32: 2428; AVX: # %bb.0: # %entry 2429; AVX-NEXT: subq $40, %rsp 2430; AVX-NEXT: .cfi_def_cfa_offset 48 2431; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 2432; AVX-NEXT: callq logf@PLT 2433; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2434; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2435; AVX-NEXT: callq logf@PLT 2436; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2437; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 2438; AVX-NEXT: callq logf@PLT 2439; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 2440; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 2441; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2442; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 2443; AVX-NEXT: addq $40, %rsp 2444; AVX-NEXT: .cfi_def_cfa_offset 8 2445; AVX-NEXT: retq 2446entry: 2447 %log = call <3 x float> @llvm.experimental.constrained.log.v3f32( 2448 <3 x float> <float 42.0, float 43.0, float 44.0>, 2449 metadata !"round.dynamic", 2450 metadata !"fpexcept.strict") #0 2451 ret <3 x float> %log 2452} 2453 2454define <3 x double> @constrained_vector_log_v3f64() #0 { 2455; CHECK-LABEL: constrained_vector_log_v3f64: 2456; CHECK: # %bb.0: # %entry 2457; CHECK-NEXT: subq $24, %rsp 2458; CHECK-NEXT: .cfi_def_cfa_offset 32 2459; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2460; CHECK-NEXT: callq log@PLT 2461; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2462; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2463; CHECK-NEXT: callq log@PLT 2464; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 2465; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2466; CHECK-NEXT: callq log@PLT 2467; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 2468; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 2469; CHECK-NEXT: wait 2470; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 2471; CHECK-NEXT: # xmm0 = mem[0],zero 2472; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 2473; CHECK-NEXT: # xmm1 = mem[0],zero 2474; CHECK-NEXT: addq $24, %rsp 2475; CHECK-NEXT: .cfi_def_cfa_offset 8 2476; CHECK-NEXT: retq 2477; 2478; AVX-LABEL: constrained_vector_log_v3f64: 2479; AVX: # %bb.0: # %entry 2480; AVX-NEXT: subq $40, %rsp 2481; AVX-NEXT: .cfi_def_cfa_offset 48 2482; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2483; AVX-NEXT: callq log@PLT 2484; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2485; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2486; AVX-NEXT: callq log@PLT 2487; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2488; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2489; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 2490; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2491; AVX-NEXT: vzeroupper 2492; AVX-NEXT: callq log@PLT 2493; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 2494; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2495; AVX-NEXT: addq $40, %rsp 2496; AVX-NEXT: .cfi_def_cfa_offset 8 2497; AVX-NEXT: retq 2498entry: 2499 %log = call <3 x double> @llvm.experimental.constrained.log.v3f64( 2500 <3 x double> <double 42.0, double 42.1, double 42.2>, 2501 metadata !"round.dynamic", 2502 metadata !"fpexcept.strict") #0 2503 ret <3 x double> %log 2504} 2505 2506define <4 x double> @constrained_vector_log_v4f64() #0 { 2507; CHECK-LABEL: constrained_vector_log_v4f64: 2508; CHECK: # %bb.0: # %entry 2509; CHECK-NEXT: subq $40, %rsp 2510; CHECK-NEXT: .cfi_def_cfa_offset 48 2511; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2512; CHECK-NEXT: callq log@PLT 2513; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2514; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2515; CHECK-NEXT: callq log@PLT 2516; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2517; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2518; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2519; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 2520; CHECK-NEXT: callq log@PLT 2521; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2522; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2523; CHECK-NEXT: callq log@PLT 2524; CHECK-NEXT: movaps %xmm0, %xmm1 2525; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2526; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2527; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 2528; CHECK-NEXT: addq $40, %rsp 2529; CHECK-NEXT: .cfi_def_cfa_offset 8 2530; CHECK-NEXT: retq 2531; 2532; AVX-LABEL: constrained_vector_log_v4f64: 2533; AVX: # %bb.0: # %entry 2534; AVX-NEXT: subq $40, %rsp 2535; AVX-NEXT: .cfi_def_cfa_offset 48 2536; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 2537; AVX-NEXT: callq log@PLT 2538; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2539; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2540; AVX-NEXT: callq log@PLT 2541; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2542; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2543; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2544; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2545; AVX-NEXT: callq log@PLT 2546; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2547; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2548; AVX-NEXT: callq log@PLT 2549; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2550; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2551; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 2552; AVX-NEXT: addq $40, %rsp 2553; AVX-NEXT: .cfi_def_cfa_offset 8 2554; AVX-NEXT: retq 2555entry: 2556 %log = call <4 x double> @llvm.experimental.constrained.log.v4f64( 2557 <4 x double> <double 42.0, double 42.1, 2558 double 42.2, double 42.3>, 2559 metadata !"round.dynamic", 2560 metadata !"fpexcept.strict") #0 2561 ret <4 x double> %log 2562} 2563 2564define <1 x float> @constrained_vector_log10_v1f32() #0 { 2565; CHECK-LABEL: constrained_vector_log10_v1f32: 2566; CHECK: # %bb.0: # %entry 2567; CHECK-NEXT: pushq %rax 2568; CHECK-NEXT: .cfi_def_cfa_offset 16 2569; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2570; CHECK-NEXT: callq log10f@PLT 2571; CHECK-NEXT: popq %rax 2572; CHECK-NEXT: .cfi_def_cfa_offset 8 2573; CHECK-NEXT: retq 2574; 2575; AVX-LABEL: constrained_vector_log10_v1f32: 2576; AVX: # %bb.0: # %entry 2577; AVX-NEXT: pushq %rax 2578; AVX-NEXT: .cfi_def_cfa_offset 16 2579; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2580; AVX-NEXT: callq log10f@PLT 2581; AVX-NEXT: popq %rax 2582; AVX-NEXT: .cfi_def_cfa_offset 8 2583; AVX-NEXT: retq 2584entry: 2585 %log10 = call <1 x float> @llvm.experimental.constrained.log10.v1f32( 2586 <1 x float> <float 42.0>, 2587 metadata !"round.dynamic", 2588 metadata !"fpexcept.strict") #0 2589 ret <1 x float> %log10 2590} 2591 2592define <2 x double> @constrained_vector_log10_v2f64() #0 { 2593; CHECK-LABEL: constrained_vector_log10_v2f64: 2594; CHECK: # %bb.0: # %entry 2595; CHECK-NEXT: subq $24, %rsp 2596; CHECK-NEXT: .cfi_def_cfa_offset 32 2597; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2598; CHECK-NEXT: callq log10@PLT 2599; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2600; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2601; CHECK-NEXT: callq log10@PLT 2602; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2603; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2604; CHECK-NEXT: addq $24, %rsp 2605; CHECK-NEXT: .cfi_def_cfa_offset 8 2606; CHECK-NEXT: retq 2607; 2608; AVX-LABEL: constrained_vector_log10_v2f64: 2609; AVX: # %bb.0: # %entry 2610; AVX-NEXT: subq $24, %rsp 2611; AVX-NEXT: .cfi_def_cfa_offset 32 2612; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2613; AVX-NEXT: callq log10@PLT 2614; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2615; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2616; AVX-NEXT: callq log10@PLT 2617; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2618; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2619; AVX-NEXT: addq $24, %rsp 2620; AVX-NEXT: .cfi_def_cfa_offset 8 2621; AVX-NEXT: retq 2622entry: 2623 %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64( 2624 <2 x double> <double 42.0, double 42.1>, 2625 metadata !"round.dynamic", 2626 metadata !"fpexcept.strict") #0 2627 ret <2 x double> %log10 2628} 2629 2630define <3 x float> @constrained_vector_log10_v3f32() #0 { 2631; CHECK-LABEL: constrained_vector_log10_v3f32: 2632; CHECK: # %bb.0: # %entry 2633; CHECK-NEXT: subq $40, %rsp 2634; CHECK-NEXT: .cfi_def_cfa_offset 48 2635; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 2636; CHECK-NEXT: callq log10f@PLT 2637; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2638; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2639; CHECK-NEXT: callq log10f@PLT 2640; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2641; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 2642; CHECK-NEXT: callq log10f@PLT 2643; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 2644; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2645; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2646; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2647; CHECK-NEXT: movaps %xmm1, %xmm0 2648; CHECK-NEXT: addq $40, %rsp 2649; CHECK-NEXT: .cfi_def_cfa_offset 8 2650; CHECK-NEXT: retq 2651; 2652; AVX-LABEL: constrained_vector_log10_v3f32: 2653; AVX: # %bb.0: # %entry 2654; AVX-NEXT: subq $40, %rsp 2655; AVX-NEXT: .cfi_def_cfa_offset 48 2656; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 2657; AVX-NEXT: callq log10f@PLT 2658; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2659; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2660; AVX-NEXT: callq log10f@PLT 2661; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2662; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 2663; AVX-NEXT: callq log10f@PLT 2664; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 2665; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 2666; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2667; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 2668; AVX-NEXT: addq $40, %rsp 2669; AVX-NEXT: .cfi_def_cfa_offset 8 2670; AVX-NEXT: retq 2671entry: 2672 %log10 = call <3 x float> @llvm.experimental.constrained.log10.v3f32( 2673 <3 x float> <float 42.0, float 43.0, float 44.0>, 2674 metadata !"round.dynamic", 2675 metadata !"fpexcept.strict") #0 2676 ret <3 x float> %log10 2677} 2678 2679define <3 x double> @constrained_vector_log10_v3f64() #0 { 2680; CHECK-LABEL: constrained_vector_log10_v3f64: 2681; CHECK: # %bb.0: # %entry 2682; CHECK-NEXT: subq $24, %rsp 2683; CHECK-NEXT: .cfi_def_cfa_offset 32 2684; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2685; CHECK-NEXT: callq log10@PLT 2686; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2687; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2688; CHECK-NEXT: callq log10@PLT 2689; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 2690; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2691; CHECK-NEXT: callq log10@PLT 2692; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 2693; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 2694; CHECK-NEXT: wait 2695; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 2696; CHECK-NEXT: # xmm0 = mem[0],zero 2697; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 2698; CHECK-NEXT: # xmm1 = mem[0],zero 2699; CHECK-NEXT: addq $24, %rsp 2700; CHECK-NEXT: .cfi_def_cfa_offset 8 2701; CHECK-NEXT: retq 2702; 2703; AVX-LABEL: constrained_vector_log10_v3f64: 2704; AVX: # %bb.0: # %entry 2705; AVX-NEXT: subq $40, %rsp 2706; AVX-NEXT: .cfi_def_cfa_offset 48 2707; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2708; AVX-NEXT: callq log10@PLT 2709; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2710; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2711; AVX-NEXT: callq log10@PLT 2712; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2713; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2714; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 2715; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2716; AVX-NEXT: vzeroupper 2717; AVX-NEXT: callq log10@PLT 2718; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 2719; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2720; AVX-NEXT: addq $40, %rsp 2721; AVX-NEXT: .cfi_def_cfa_offset 8 2722; AVX-NEXT: retq 2723entry: 2724 %log10 = call <3 x double> @llvm.experimental.constrained.log10.v3f64( 2725 <3 x double> <double 42.0, double 42.1, double 42.2>, 2726 metadata !"round.dynamic", 2727 metadata !"fpexcept.strict") #0 2728 ret <3 x double> %log10 2729} 2730 2731define <4 x double> @constrained_vector_log10_v4f64() #0 { 2732; CHECK-LABEL: constrained_vector_log10_v4f64: 2733; CHECK: # %bb.0: # %entry 2734; CHECK-NEXT: subq $40, %rsp 2735; CHECK-NEXT: .cfi_def_cfa_offset 48 2736; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2737; CHECK-NEXT: callq log10@PLT 2738; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2739; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2740; CHECK-NEXT: callq log10@PLT 2741; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2742; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2743; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2744; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 2745; CHECK-NEXT: callq log10@PLT 2746; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2747; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2748; CHECK-NEXT: callq log10@PLT 2749; CHECK-NEXT: movaps %xmm0, %xmm1 2750; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2751; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2752; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 2753; CHECK-NEXT: addq $40, %rsp 2754; CHECK-NEXT: .cfi_def_cfa_offset 8 2755; CHECK-NEXT: retq 2756; 2757; AVX-LABEL: constrained_vector_log10_v4f64: 2758; AVX: # %bb.0: # %entry 2759; AVX-NEXT: subq $40, %rsp 2760; AVX-NEXT: .cfi_def_cfa_offset 48 2761; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 2762; AVX-NEXT: callq log10@PLT 2763; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2764; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2765; AVX-NEXT: callq log10@PLT 2766; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2767; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2768; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2769; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2770; AVX-NEXT: callq log10@PLT 2771; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2772; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2773; AVX-NEXT: callq log10@PLT 2774; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2775; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2776; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 2777; AVX-NEXT: addq $40, %rsp 2778; AVX-NEXT: .cfi_def_cfa_offset 8 2779; AVX-NEXT: retq 2780entry: 2781 %log10 = call <4 x double> @llvm.experimental.constrained.log10.v4f64( 2782 <4 x double> <double 42.0, double 42.1, 2783 double 42.2, double 42.3>, 2784 metadata !"round.dynamic", 2785 metadata !"fpexcept.strict") #0 2786 ret <4 x double> %log10 2787} 2788 2789define <1 x float> @constrained_vector_log2_v1f32() #0 { 2790; CHECK-LABEL: constrained_vector_log2_v1f32: 2791; CHECK: # %bb.0: # %entry 2792; CHECK-NEXT: pushq %rax 2793; CHECK-NEXT: .cfi_def_cfa_offset 16 2794; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2795; CHECK-NEXT: callq log2f@PLT 2796; CHECK-NEXT: popq %rax 2797; CHECK-NEXT: .cfi_def_cfa_offset 8 2798; CHECK-NEXT: retq 2799; 2800; AVX-LABEL: constrained_vector_log2_v1f32: 2801; AVX: # %bb.0: # %entry 2802; AVX-NEXT: pushq %rax 2803; AVX-NEXT: .cfi_def_cfa_offset 16 2804; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2805; AVX-NEXT: callq log2f@PLT 2806; AVX-NEXT: popq %rax 2807; AVX-NEXT: .cfi_def_cfa_offset 8 2808; AVX-NEXT: retq 2809entry: 2810 %log2 = call <1 x float> @llvm.experimental.constrained.log2.v1f32( 2811 <1 x float> <float 42.0>, 2812 metadata !"round.dynamic", 2813 metadata !"fpexcept.strict") #0 2814 ret <1 x float> %log2 2815} 2816 2817define <2 x double> @constrained_vector_log2_v2f64() #0 { 2818; CHECK-LABEL: constrained_vector_log2_v2f64: 2819; CHECK: # %bb.0: # %entry 2820; CHECK-NEXT: subq $24, %rsp 2821; CHECK-NEXT: .cfi_def_cfa_offset 32 2822; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2823; CHECK-NEXT: callq log2@PLT 2824; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2825; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2826; CHECK-NEXT: callq log2@PLT 2827; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2828; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2829; CHECK-NEXT: addq $24, %rsp 2830; CHECK-NEXT: .cfi_def_cfa_offset 8 2831; CHECK-NEXT: retq 2832; 2833; AVX-LABEL: constrained_vector_log2_v2f64: 2834; AVX: # %bb.0: # %entry 2835; AVX-NEXT: subq $24, %rsp 2836; AVX-NEXT: .cfi_def_cfa_offset 32 2837; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2838; AVX-NEXT: callq log2@PLT 2839; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2840; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2841; AVX-NEXT: callq log2@PLT 2842; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2843; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2844; AVX-NEXT: addq $24, %rsp 2845; AVX-NEXT: .cfi_def_cfa_offset 8 2846; AVX-NEXT: retq 2847entry: 2848 %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64( 2849 <2 x double> <double 42.0, double 42.1>, 2850 metadata !"round.dynamic", 2851 metadata !"fpexcept.strict") #0 2852 ret <2 x double> %log2 2853} 2854 2855define <3 x float> @constrained_vector_log2_v3f32() #0 { 2856; CHECK-LABEL: constrained_vector_log2_v3f32: 2857; CHECK: # %bb.0: # %entry 2858; CHECK-NEXT: subq $40, %rsp 2859; CHECK-NEXT: .cfi_def_cfa_offset 48 2860; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 2861; CHECK-NEXT: callq log2f@PLT 2862; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2863; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2864; CHECK-NEXT: callq log2f@PLT 2865; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2866; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 2867; CHECK-NEXT: callq log2f@PLT 2868; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 2869; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2870; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2871; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2872; CHECK-NEXT: movaps %xmm1, %xmm0 2873; CHECK-NEXT: addq $40, %rsp 2874; CHECK-NEXT: .cfi_def_cfa_offset 8 2875; CHECK-NEXT: retq 2876; 2877; AVX-LABEL: constrained_vector_log2_v3f32: 2878; AVX: # %bb.0: # %entry 2879; AVX-NEXT: subq $40, %rsp 2880; AVX-NEXT: .cfi_def_cfa_offset 48 2881; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 2882; AVX-NEXT: callq log2f@PLT 2883; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2884; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 2885; AVX-NEXT: callq log2f@PLT 2886; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2887; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 2888; AVX-NEXT: callq log2f@PLT 2889; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 2890; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 2891; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 2892; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 2893; AVX-NEXT: addq $40, %rsp 2894; AVX-NEXT: .cfi_def_cfa_offset 8 2895; AVX-NEXT: retq 2896entry: 2897 %log2 = call <3 x float> @llvm.experimental.constrained.log2.v3f32( 2898 <3 x float> <float 42.0, float 43.0, float 44.0>, 2899 metadata !"round.dynamic", 2900 metadata !"fpexcept.strict") #0 2901 ret <3 x float> %log2 2902} 2903 2904define <3 x double> @constrained_vector_log2_v3f64() #0 { 2905; CHECK-LABEL: constrained_vector_log2_v3f64: 2906; CHECK: # %bb.0: # %entry 2907; CHECK-NEXT: subq $24, %rsp 2908; CHECK-NEXT: .cfi_def_cfa_offset 32 2909; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2910; CHECK-NEXT: callq log2@PLT 2911; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 2912; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2913; CHECK-NEXT: callq log2@PLT 2914; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 2915; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2916; CHECK-NEXT: callq log2@PLT 2917; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 2918; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 2919; CHECK-NEXT: wait 2920; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 2921; CHECK-NEXT: # xmm0 = mem[0],zero 2922; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 2923; CHECK-NEXT: # xmm1 = mem[0],zero 2924; CHECK-NEXT: addq $24, %rsp 2925; CHECK-NEXT: .cfi_def_cfa_offset 8 2926; CHECK-NEXT: retq 2927; 2928; AVX-LABEL: constrained_vector_log2_v3f64: 2929; AVX: # %bb.0: # %entry 2930; AVX-NEXT: subq $40, %rsp 2931; AVX-NEXT: .cfi_def_cfa_offset 48 2932; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2933; AVX-NEXT: callq log2@PLT 2934; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2935; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2936; AVX-NEXT: callq log2@PLT 2937; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2938; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2939; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 2940; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2941; AVX-NEXT: vzeroupper 2942; AVX-NEXT: callq log2@PLT 2943; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 2944; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2945; AVX-NEXT: addq $40, %rsp 2946; AVX-NEXT: .cfi_def_cfa_offset 8 2947; AVX-NEXT: retq 2948entry: 2949 %log2 = call <3 x double> @llvm.experimental.constrained.log2.v3f64( 2950 <3 x double> <double 42.0, double 42.1, double 42.2>, 2951 metadata !"round.dynamic", 2952 metadata !"fpexcept.strict") #0 2953 ret <3 x double> %log2 2954} 2955 2956define <4 x double> @constrained_vector_log2_v4f64() #0 { 2957; CHECK-LABEL: constrained_vector_log2_v4f64: 2958; CHECK: # %bb.0: # %entry 2959; CHECK-NEXT: subq $40, %rsp 2960; CHECK-NEXT: .cfi_def_cfa_offset 48 2961; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2962; CHECK-NEXT: callq log2@PLT 2963; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2964; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2965; CHECK-NEXT: callq log2@PLT 2966; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 2967; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 2968; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 2969; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 2970; CHECK-NEXT: callq log2@PLT 2971; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2972; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2973; CHECK-NEXT: callq log2@PLT 2974; CHECK-NEXT: movaps %xmm0, %xmm1 2975; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 2976; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 2977; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 2978; CHECK-NEXT: addq $40, %rsp 2979; CHECK-NEXT: .cfi_def_cfa_offset 8 2980; CHECK-NEXT: retq 2981; 2982; AVX-LABEL: constrained_vector_log2_v4f64: 2983; AVX: # %bb.0: # %entry 2984; AVX-NEXT: subq $40, %rsp 2985; AVX-NEXT: .cfi_def_cfa_offset 48 2986; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 2987; AVX-NEXT: callq log2@PLT 2988; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2989; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 2990; AVX-NEXT: callq log2@PLT 2991; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 2992; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 2993; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 2994; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 2995; AVX-NEXT: callq log2@PLT 2996; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 2997; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 2998; AVX-NEXT: callq log2@PLT 2999; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 3000; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3001; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 3002; AVX-NEXT: addq $40, %rsp 3003; AVX-NEXT: .cfi_def_cfa_offset 8 3004; AVX-NEXT: retq 3005entry: 3006 %log2 = call <4 x double> @llvm.experimental.constrained.log2.v4f64( 3007 <4 x double> <double 42.0, double 42.1, 3008 double 42.2, double 42.3>, 3009 metadata !"round.dynamic", 3010 metadata !"fpexcept.strict") #0 3011 ret <4 x double> %log2 3012} 3013 3014define <1 x float> @constrained_vector_rint_v1f32_var(ptr %a) #0 { 3015; CHECK-LABEL: constrained_vector_rint_v1f32_var: 3016; CHECK: # %bb.0: # %entry 3017; CHECK-NEXT: pushq %rax 3018; CHECK-NEXT: .cfi_def_cfa_offset 16 3019; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3020; CHECK-NEXT: callq rintf@PLT 3021; CHECK-NEXT: popq %rax 3022; CHECK-NEXT: .cfi_def_cfa_offset 8 3023; CHECK-NEXT: retq 3024; 3025; AVX-LABEL: constrained_vector_rint_v1f32_var: 3026; AVX: # %bb.0: # %entry 3027; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3028; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 3029; AVX-NEXT: retq 3030entry: 3031 %b = load <1 x float>, ptr %a 3032 %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32( 3033 <1 x float> %b, 3034 metadata !"round.dynamic", 3035 metadata !"fpexcept.strict") #0 3036 ret <1 x float> %rint 3037} 3038 3039define <2 x double> @constrained_vector_rint_v2f64() #0 { 3040; CHECK-LABEL: constrained_vector_rint_v2f64: 3041; CHECK: # %bb.0: # %entry 3042; CHECK-NEXT: subq $24, %rsp 3043; CHECK-NEXT: .cfi_def_cfa_offset 32 3044; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 3045; CHECK-NEXT: callq rint@PLT 3046; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3047; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 3048; CHECK-NEXT: callq rint@PLT 3049; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 3050; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3051; CHECK-NEXT: addq $24, %rsp 3052; CHECK-NEXT: .cfi_def_cfa_offset 8 3053; CHECK-NEXT: retq 3054; 3055; AVX-LABEL: constrained_vector_rint_v2f64: 3056; AVX: # %bb.0: # %entry 3057; AVX-NEXT: vroundpd $4, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 3058; AVX-NEXT: retq 3059entry: 3060 %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64( 3061 <2 x double> <double 42.1, double 42.0>, 3062 metadata !"round.dynamic", 3063 metadata !"fpexcept.strict") #0 3064 ret <2 x double> %rint 3065} 3066 3067define <2 x double> @constrained_vector_rint_v2f64_var(ptr %a) #0 { 3068; CHECK-LABEL: constrained_vector_rint_v2f64_var: 3069; CHECK: # %bb.0: # %entry 3070; CHECK-NEXT: subq $40, %rsp 3071; CHECK-NEXT: .cfi_def_cfa_offset 48 3072; CHECK-NEXT: movaps (%rdi), %xmm0 3073; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3074; CHECK-NEXT: callq rint@PLT 3075; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3076; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3077; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 3078; CHECK-NEXT: callq rint@PLT 3079; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 3080; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 3081; CHECK-NEXT: movaps %xmm1, %xmm0 3082; CHECK-NEXT: addq $40, %rsp 3083; CHECK-NEXT: .cfi_def_cfa_offset 8 3084; CHECK-NEXT: retq 3085; 3086; AVX-LABEL: constrained_vector_rint_v2f64_var: 3087; AVX: # %bb.0: # %entry 3088; AVX-NEXT: vroundpd $4, (%rdi), %xmm0 3089; AVX-NEXT: retq 3090entry: 3091 %b = load <2 x double>, ptr %a 3092 %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64( 3093 <2 x double> %b, 3094 metadata !"round.dynamic", 3095 metadata !"fpexcept.strict") #0 3096 ret <2 x double> %rint 3097} 3098 3099define <3 x float> @constrained_vector_rint_v3f32_var(ptr %a) #0 { 3100; CHECK-LABEL: constrained_vector_rint_v3f32_var: 3101; CHECK: # %bb.0: # %entry 3102; CHECK-NEXT: subq $56, %rsp 3103; CHECK-NEXT: .cfi_def_cfa_offset 64 3104; CHECK-NEXT: movaps (%rdi), %xmm0 3105; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3106; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 3107; CHECK-NEXT: callq rintf@PLT 3108; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3109; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3110; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 3111; CHECK-NEXT: callq rintf@PLT 3112; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3113; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3114; CHECK-NEXT: callq rintf@PLT 3115; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 3116; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 3117; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 3118; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3119; CHECK-NEXT: addq $56, %rsp 3120; CHECK-NEXT: .cfi_def_cfa_offset 8 3121; CHECK-NEXT: retq 3122; 3123; AVX-LABEL: constrained_vector_rint_v3f32_var: 3124; AVX: # %bb.0: # %entry 3125; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3126; AVX-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 3127; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3128; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 3129; AVX-NEXT: vroundss $4, %xmm1, %xmm1, %xmm1 3130; AVX-NEXT: vroundss $4, %xmm2, %xmm2, %xmm2 3131; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 3132; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 3133; AVX-NEXT: retq 3134 entry: 3135 %b = load <3 x float>, ptr %a 3136 %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32( 3137 <3 x float> %b, 3138 metadata !"round.dynamic", 3139 metadata !"fpexcept.strict") #0 3140 ret <3 x float> %rint 3141} 3142 3143define <3 x double> @constrained_vector_rint_v3f64() #0 { 3144; CHECK-LABEL: constrained_vector_rint_v3f64: 3145; CHECK: # %bb.0: # %entry 3146; CHECK-NEXT: subq $24, %rsp 3147; CHECK-NEXT: .cfi_def_cfa_offset 32 3148; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 3149; CHECK-NEXT: callq rint@PLT 3150; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3151; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 3152; CHECK-NEXT: callq rint@PLT 3153; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 3154; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 3155; CHECK-NEXT: callq rint@PLT 3156; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 3157; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 3158; CHECK-NEXT: wait 3159; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 3160; CHECK-NEXT: # xmm0 = mem[0],zero 3161; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 3162; CHECK-NEXT: # xmm1 = mem[0],zero 3163; CHECK-NEXT: addq $24, %rsp 3164; CHECK-NEXT: .cfi_def_cfa_offset 8 3165; CHECK-NEXT: retq 3166; 3167; AVX-LABEL: constrained_vector_rint_v3f64: 3168; AVX: # %bb.0: # %entry 3169; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 3170; AVX-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0 3171; AVX-NEXT: vroundpd $4, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 3172; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3173; AVX-NEXT: retq 3174entry: 3175 %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64( 3176 <3 x double> <double 42.0, double 42.1, double 42.2>, 3177 metadata !"round.dynamic", 3178 metadata !"fpexcept.strict") #0 3179 ret <3 x double> %rint 3180} 3181 3182define <3 x double> @constrained_vector_rint_v3f64_var(ptr %a) #0 { 3183; CHECK-LABEL: constrained_vector_rint_v3f64_var: 3184; CHECK: # %bb.0: # %entry 3185; CHECK-NEXT: subq $40, %rsp 3186; CHECK-NEXT: .cfi_def_cfa_offset 48 3187; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3188; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3189; CHECK-NEXT: movaps (%rdi), %xmm0 3190; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3191; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 3192; CHECK-NEXT: callq rint@PLT 3193; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3194; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3195; CHECK-NEXT: callq rint@PLT 3196; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 3197; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload 3198; CHECK-NEXT: # xmm0 = mem[0],zero 3199; CHECK-NEXT: callq rint@PLT 3200; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 3201; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 3202; CHECK-NEXT: wait 3203; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 3204; CHECK-NEXT: # xmm0 = mem[0],zero 3205; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 3206; CHECK-NEXT: # xmm1 = mem[0],zero 3207; CHECK-NEXT: addq $40, %rsp 3208; CHECK-NEXT: .cfi_def_cfa_offset 8 3209; CHECK-NEXT: retq 3210; 3211; AVX-LABEL: constrained_vector_rint_v3f64_var: 3212; AVX: # %bb.0: # %entry 3213; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3214; AVX-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0 3215; AVX-NEXT: vroundpd $4, (%rdi), %xmm1 3216; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3217; AVX-NEXT: retq 3218entry: 3219 %b = load <3 x double>, ptr %a 3220 %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64( 3221 <3 x double> %b, 3222 metadata !"round.dynamic", 3223 metadata !"fpexcept.strict") #0 3224 ret <3 x double> %rint 3225} 3226 3227define <4 x double> @constrained_vector_rint_v4f64() #0 { 3228; CHECK-LABEL: constrained_vector_rint_v4f64: 3229; CHECK: # %bb.0: # %entry 3230; CHECK-NEXT: subq $40, %rsp 3231; CHECK-NEXT: .cfi_def_cfa_offset 48 3232; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 3233; CHECK-NEXT: callq rint@PLT 3234; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3235; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 3236; CHECK-NEXT: callq rint@PLT 3237; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 3238; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3239; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3240; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0] 3241; CHECK-NEXT: callq rint@PLT 3242; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3243; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 3244; CHECK-NEXT: callq rint@PLT 3245; CHECK-NEXT: movaps %xmm0, %xmm1 3246; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 3247; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 3248; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3249; CHECK-NEXT: addq $40, %rsp 3250; CHECK-NEXT: .cfi_def_cfa_offset 8 3251; CHECK-NEXT: retq 3252; 3253; AVX-LABEL: constrained_vector_rint_v4f64: 3254; AVX: # %bb.0: # %entry 3255; AVX-NEXT: vroundpd $4, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 3256; AVX-NEXT: retq 3257entry: 3258 %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64( 3259 <4 x double> <double 42.1, double 42.2, 3260 double 42.3, double 42.4>, 3261 metadata !"round.dynamic", 3262 metadata !"fpexcept.strict") #0 3263 ret <4 x double> %rint 3264} 3265 3266define <4 x double> @constrained_vector_rint_v4f64_var(ptr %a) #0 { 3267; CHECK-LABEL: constrained_vector_rint_v4f64_var: 3268; CHECK: # %bb.0: # %entry 3269; CHECK-NEXT: subq $56, %rsp 3270; CHECK-NEXT: .cfi_def_cfa_offset 64 3271; CHECK-NEXT: movaps (%rdi), %xmm1 3272; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill 3273; CHECK-NEXT: movaps 16(%rdi), %xmm0 3274; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3275; CHECK-NEXT: movaps %xmm1, %xmm0 3276; CHECK-NEXT: callq rint@PLT 3277; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3278; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3279; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 3280; CHECK-NEXT: callq rint@PLT 3281; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 3282; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 3283; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3284; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 3285; CHECK-NEXT: callq rint@PLT 3286; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3287; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 3288; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 3289; CHECK-NEXT: callq rint@PLT 3290; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 3291; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 3292; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 3293; CHECK-NEXT: addq $56, %rsp 3294; CHECK-NEXT: .cfi_def_cfa_offset 8 3295; CHECK-NEXT: retq 3296; 3297; AVX-LABEL: constrained_vector_rint_v4f64_var: 3298; AVX: # %bb.0: # %entry 3299; AVX-NEXT: vroundpd $4, (%rdi), %ymm0 3300; AVX-NEXT: retq 3301entry: 3302 %b = load <4 x double>, ptr %a 3303 %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64( 3304 <4 x double> %b, 3305 metadata !"round.dynamic", 3306 metadata !"fpexcept.strict") #0 3307 ret <4 x double> %rint 3308} 3309 3310define <1 x float> @constrained_vector_nearbyint_v1f32_var(ptr %a) #0 { 3311; CHECK-LABEL: constrained_vector_nearbyint_v1f32_var: 3312; CHECK: # %bb.0: # %entry 3313; CHECK-NEXT: pushq %rax 3314; CHECK-NEXT: .cfi_def_cfa_offset 16 3315; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3316; CHECK-NEXT: callq nearbyintf@PLT 3317; CHECK-NEXT: popq %rax 3318; CHECK-NEXT: .cfi_def_cfa_offset 8 3319; CHECK-NEXT: retq 3320; 3321; AVX-LABEL: constrained_vector_nearbyint_v1f32_var: 3322; AVX: # %bb.0: # %entry 3323; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3324; AVX-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 3325; AVX-NEXT: retq 3326entry: 3327 %b = load <1 x float>, ptr %a 3328 %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32( 3329 <1 x float> %b, 3330 metadata !"round.dynamic", 3331 metadata !"fpexcept.strict") #0 3332 ret <1 x float> %nearby 3333} 3334 3335define <2 x double> @constrained_vector_nearbyint_v2f64() #0 { 3336; CHECK-LABEL: constrained_vector_nearbyint_v2f64: 3337; CHECK: # %bb.0: # %entry 3338; CHECK-NEXT: subq $24, %rsp 3339; CHECK-NEXT: .cfi_def_cfa_offset 32 3340; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 3341; CHECK-NEXT: callq nearbyint@PLT 3342; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3343; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 3344; CHECK-NEXT: callq nearbyint@PLT 3345; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 3346; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3347; CHECK-NEXT: addq $24, %rsp 3348; CHECK-NEXT: .cfi_def_cfa_offset 8 3349; CHECK-NEXT: retq 3350; 3351; AVX-LABEL: constrained_vector_nearbyint_v2f64: 3352; AVX: # %bb.0: # %entry 3353; AVX-NEXT: vroundpd $12, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 3354; AVX-NEXT: retq 3355entry: 3356 %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( 3357 <2 x double> <double 42.1, double 42.0>, 3358 metadata !"round.dynamic", 3359 metadata !"fpexcept.strict") #0 3360 ret <2 x double> %nearby 3361} 3362 3363define <2 x double> @constrained_vector_nearbyint_v2f64_var(ptr %a) #0 { 3364; CHECK-LABEL: constrained_vector_nearbyint_v2f64_var: 3365; CHECK: # %bb.0: # %entry 3366; CHECK-NEXT: subq $40, %rsp 3367; CHECK-NEXT: .cfi_def_cfa_offset 48 3368; CHECK-NEXT: movaps (%rdi), %xmm0 3369; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3370; CHECK-NEXT: callq nearbyint@PLT 3371; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3372; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3373; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 3374; CHECK-NEXT: callq nearbyint@PLT 3375; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 3376; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 3377; CHECK-NEXT: movaps %xmm1, %xmm0 3378; CHECK-NEXT: addq $40, %rsp 3379; CHECK-NEXT: .cfi_def_cfa_offset 8 3380; CHECK-NEXT: retq 3381; 3382; AVX-LABEL: constrained_vector_nearbyint_v2f64_var: 3383; AVX: # %bb.0: # %entry 3384; AVX-NEXT: vroundpd $12, (%rdi), %xmm0 3385; AVX-NEXT: retq 3386entry: 3387 %b = load <2 x double>, ptr %a 3388 %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( 3389 <2 x double> %b, 3390 metadata !"round.dynamic", 3391 metadata !"fpexcept.strict") #0 3392 ret <2 x double> %nearby 3393} 3394 3395define <3 x float> @constrained_vector_nearbyint_v3f32_var(ptr %a) #0 { 3396; CHECK-LABEL: constrained_vector_nearbyint_v3f32_var: 3397; CHECK: # %bb.0: # %entry 3398; CHECK-NEXT: subq $56, %rsp 3399; CHECK-NEXT: .cfi_def_cfa_offset 64 3400; CHECK-NEXT: movaps (%rdi), %xmm0 3401; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3402; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 3403; CHECK-NEXT: callq nearbyintf@PLT 3404; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3405; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3406; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 3407; CHECK-NEXT: callq nearbyintf@PLT 3408; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3409; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3410; CHECK-NEXT: callq nearbyintf@PLT 3411; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 3412; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 3413; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 3414; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3415; CHECK-NEXT: addq $56, %rsp 3416; CHECK-NEXT: .cfi_def_cfa_offset 8 3417; CHECK-NEXT: retq 3418; 3419; AVX-LABEL: constrained_vector_nearbyint_v3f32_var: 3420; AVX: # %bb.0: # %entry 3421; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 3422; AVX-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 3423; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 3424; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 3425; AVX-NEXT: vroundss $12, %xmm1, %xmm1, %xmm1 3426; AVX-NEXT: vroundss $12, %xmm2, %xmm2, %xmm2 3427; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 3428; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 3429; AVX-NEXT: retq 3430entry: 3431 %b = load <3 x float>, ptr %a 3432 %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32( 3433 <3 x float> %b, 3434 metadata !"round.dynamic", 3435 metadata !"fpexcept.strict") #0 3436 ret <3 x float> %nearby 3437} 3438 3439define <3 x double> @constrained_vector_nearby_v3f64() #0 { 3440; CHECK-LABEL: constrained_vector_nearby_v3f64: 3441; CHECK: # %bb.0: # %entry 3442; CHECK-NEXT: subq $24, %rsp 3443; CHECK-NEXT: .cfi_def_cfa_offset 32 3444; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 3445; CHECK-NEXT: callq nearbyint@PLT 3446; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3447; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 3448; CHECK-NEXT: callq nearbyint@PLT 3449; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 3450; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 3451; CHECK-NEXT: callq nearbyint@PLT 3452; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 3453; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 3454; CHECK-NEXT: wait 3455; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 3456; CHECK-NEXT: # xmm0 = mem[0],zero 3457; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 3458; CHECK-NEXT: # xmm1 = mem[0],zero 3459; CHECK-NEXT: addq $24, %rsp 3460; CHECK-NEXT: .cfi_def_cfa_offset 8 3461; CHECK-NEXT: retq 3462; 3463; AVX-LABEL: constrained_vector_nearby_v3f64: 3464; AVX: # %bb.0: # %entry 3465; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 3466; AVX-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0 3467; AVX-NEXT: vroundpd $12, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 3468; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3469; AVX-NEXT: retq 3470entry: 3471 %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64( 3472 <3 x double> <double 42.0, double 42.1, double 42.2>, 3473 metadata !"round.dynamic", 3474 metadata !"fpexcept.strict") #0 3475 ret <3 x double> %nearby 3476} 3477 3478define <3 x double> @constrained_vector_nearbyint_v3f64_var(ptr %a) #0 { 3479; CHECK-LABEL: constrained_vector_nearbyint_v3f64_var: 3480; CHECK: # %bb.0: # %entry 3481; CHECK-NEXT: subq $40, %rsp 3482; CHECK-NEXT: .cfi_def_cfa_offset 48 3483; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 3484; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3485; CHECK-NEXT: movaps (%rdi), %xmm0 3486; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3487; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 3488; CHECK-NEXT: callq nearbyint@PLT 3489; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3490; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3491; CHECK-NEXT: callq nearbyint@PLT 3492; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 3493; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload 3494; CHECK-NEXT: # xmm0 = mem[0],zero 3495; CHECK-NEXT: callq nearbyint@PLT 3496; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 3497; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 3498; CHECK-NEXT: wait 3499; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 3500; CHECK-NEXT: # xmm0 = mem[0],zero 3501; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 3502; CHECK-NEXT: # xmm1 = mem[0],zero 3503; CHECK-NEXT: addq $40, %rsp 3504; CHECK-NEXT: .cfi_def_cfa_offset 8 3505; CHECK-NEXT: retq 3506; 3507; AVX-LABEL: constrained_vector_nearbyint_v3f64_var: 3508; AVX: # %bb.0: # %entry 3509; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 3510; AVX-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0 3511; AVX-NEXT: vroundpd $12, (%rdi), %xmm1 3512; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3513; AVX-NEXT: retq 3514entry: 3515 %b = load <3 x double>, ptr %a 3516 %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64( 3517 <3 x double> %b, 3518 metadata !"round.dynamic", 3519 metadata !"fpexcept.strict") #0 3520 ret <3 x double> %nearby 3521} 3522 3523define <4 x double> @constrained_vector_nearbyint_v4f64() #0 { 3524; CHECK-LABEL: constrained_vector_nearbyint_v4f64: 3525; CHECK: # %bb.0: # %entry 3526; CHECK-NEXT: subq $40, %rsp 3527; CHECK-NEXT: .cfi_def_cfa_offset 48 3528; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 3529; CHECK-NEXT: callq nearbyint@PLT 3530; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3531; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 3532; CHECK-NEXT: callq nearbyint@PLT 3533; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 3534; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3535; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3536; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2399999999999999E+1,0.0E+0] 3537; CHECK-NEXT: callq nearbyint@PLT 3538; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3539; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 3540; CHECK-NEXT: callq nearbyint@PLT 3541; CHECK-NEXT: movaps %xmm0, %xmm1 3542; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 3543; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 3544; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3545; CHECK-NEXT: addq $40, %rsp 3546; CHECK-NEXT: .cfi_def_cfa_offset 8 3547; CHECK-NEXT: retq 3548; 3549; AVX-LABEL: constrained_vector_nearbyint_v4f64: 3550; AVX: # %bb.0: # %entry 3551; AVX-NEXT: vroundpd $12, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 3552; AVX-NEXT: retq 3553entry: 3554 %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64( 3555 <4 x double> <double 42.1, double 42.2, 3556 double 42.3, double 42.4>, 3557 metadata !"round.dynamic", 3558 metadata !"fpexcept.strict") #0 3559 ret <4 x double> %nearby 3560} 3561 3562define <4 x double> @constrained_vector_nearbyint_v4f64_var(ptr %a) #0 { 3563; CHECK-LABEL: constrained_vector_nearbyint_v4f64_var: 3564; CHECK: # %bb.0: # %entry 3565; CHECK-NEXT: subq $56, %rsp 3566; CHECK-NEXT: .cfi_def_cfa_offset 64 3567; CHECK-NEXT: movaps (%rdi), %xmm1 3568; CHECK-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill 3569; CHECK-NEXT: movaps 16(%rdi), %xmm0 3570; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3571; CHECK-NEXT: movaps %xmm1, %xmm0 3572; CHECK-NEXT: callq nearbyint@PLT 3573; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3574; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3575; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 3576; CHECK-NEXT: callq nearbyint@PLT 3577; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 3578; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 3579; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3580; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 3581; CHECK-NEXT: callq nearbyint@PLT 3582; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3583; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 3584; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 3585; CHECK-NEXT: callq nearbyint@PLT 3586; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 3587; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 3588; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 3589; CHECK-NEXT: addq $56, %rsp 3590; CHECK-NEXT: .cfi_def_cfa_offset 8 3591; CHECK-NEXT: retq 3592; 3593; AVX-LABEL: constrained_vector_nearbyint_v4f64_var: 3594; AVX: # %bb.0: # %entry 3595; AVX-NEXT: vroundpd $12, (%rdi), %ymm0 3596; AVX-NEXT: retq 3597entry: 3598 %b = load <4 x double>, ptr %a 3599 %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64( 3600 <4 x double> %b, 3601 metadata !"round.dynamic", 3602 metadata !"fpexcept.strict") #0 3603 ret <4 x double> %nearby 3604} 3605 3606define <1 x float> @constrained_vector_maxnum_v1f32() #0 { 3607; CHECK-LABEL: constrained_vector_maxnum_v1f32: 3608; CHECK: # %bb.0: # %entry 3609; CHECK-NEXT: pushq %rax 3610; CHECK-NEXT: .cfi_def_cfa_offset 16 3611; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 3612; CHECK-NEXT: movss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0] 3613; CHECK-NEXT: callq fmaxf@PLT 3614; CHECK-NEXT: popq %rax 3615; CHECK-NEXT: .cfi_def_cfa_offset 8 3616; CHECK-NEXT: retq 3617; 3618; AVX-LABEL: constrained_vector_maxnum_v1f32: 3619; AVX: # %bb.0: # %entry 3620; AVX-NEXT: pushq %rax 3621; AVX-NEXT: .cfi_def_cfa_offset 16 3622; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 3623; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0] 3624; AVX-NEXT: callq fmaxf@PLT 3625; AVX-NEXT: popq %rax 3626; AVX-NEXT: .cfi_def_cfa_offset 8 3627; AVX-NEXT: retq 3628entry: 3629 %max = call <1 x float> @llvm.experimental.constrained.maxnum.v1f32( 3630 <1 x float> <float 42.0>, <1 x float> <float 41.0>, 3631 metadata !"fpexcept.strict") #0 3632 ret <1 x float> %max 3633} 3634 3635define <2 x double> @constrained_vector_maxnum_v2f64() #0 { 3636; CHECK-LABEL: constrained_vector_maxnum_v2f64: 3637; CHECK: # %bb.0: # %entry 3638; CHECK-NEXT: subq $24, %rsp 3639; CHECK-NEXT: .cfi_def_cfa_offset 32 3640; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 3641; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0] 3642; CHECK-NEXT: callq fmax@PLT 3643; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3644; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0] 3645; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0] 3646; CHECK-NEXT: callq fmax@PLT 3647; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 3648; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3649; CHECK-NEXT: addq $24, %rsp 3650; CHECK-NEXT: .cfi_def_cfa_offset 8 3651; CHECK-NEXT: retq 3652; 3653; AVX-LABEL: constrained_vector_maxnum_v2f64: 3654; AVX: # %bb.0: # %entry 3655; AVX-NEXT: subq $24, %rsp 3656; AVX-NEXT: .cfi_def_cfa_offset 32 3657; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 3658; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0] 3659; AVX-NEXT: callq fmax@PLT 3660; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3661; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0] 3662; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0] 3663; AVX-NEXT: callq fmax@PLT 3664; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 3665; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3666; AVX-NEXT: addq $24, %rsp 3667; AVX-NEXT: .cfi_def_cfa_offset 8 3668; AVX-NEXT: retq 3669entry: 3670 %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64( 3671 <2 x double> <double 43.0, double 42.0>, 3672 <2 x double> <double 41.0, double 40.0>, 3673 metadata !"fpexcept.strict") #0 3674 ret <2 x double> %max 3675} 3676 3677define <3 x float> @constrained_vector_maxnum_v3f32() #0 { 3678; CHECK-LABEL: constrained_vector_maxnum_v3f32: 3679; CHECK: # %bb.0: # %entry 3680; CHECK-NEXT: subq $40, %rsp 3681; CHECK-NEXT: .cfi_def_cfa_offset 48 3682; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.5E+1,0.0E+0,0.0E+0,0.0E+0] 3683; CHECK-NEXT: movss {{.*#+}} xmm1 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 3684; CHECK-NEXT: callq fmaxf@PLT 3685; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3686; CHECK-NEXT: movss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0] 3687; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 3688; CHECK-NEXT: callq fmaxf@PLT 3689; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3690; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 3691; CHECK-NEXT: movss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 3692; CHECK-NEXT: callq fmaxf@PLT 3693; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 3694; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3695; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 3696; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 3697; CHECK-NEXT: movaps %xmm1, %xmm0 3698; CHECK-NEXT: addq $40, %rsp 3699; CHECK-NEXT: .cfi_def_cfa_offset 8 3700; CHECK-NEXT: retq 3701; 3702; AVX-LABEL: constrained_vector_maxnum_v3f32: 3703; AVX: # %bb.0: # %entry 3704; AVX-NEXT: subq $40, %rsp 3705; AVX-NEXT: .cfi_def_cfa_offset 48 3706; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.5E+1,0.0E+0,0.0E+0,0.0E+0] 3707; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 3708; AVX-NEXT: callq fmaxf@PLT 3709; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3710; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0] 3711; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 3712; AVX-NEXT: callq fmaxf@PLT 3713; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3714; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 3715; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 3716; AVX-NEXT: callq fmaxf@PLT 3717; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 3718; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 3719; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 3720; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 3721; AVX-NEXT: addq $40, %rsp 3722; AVX-NEXT: .cfi_def_cfa_offset 8 3723; AVX-NEXT: retq 3724entry: 3725 %max = call <3 x float> @llvm.experimental.constrained.maxnum.v3f32( 3726 <3 x float> <float 43.0, float 44.0, float 45.0>, 3727 <3 x float> <float 41.0, float 42.0, float 43.0>, 3728 metadata !"fpexcept.strict") #0 3729 ret <3 x float> %max 3730} 3731 3732define <3 x double> @constrained_vector_max_v3f64() #0 { 3733; CHECK-LABEL: constrained_vector_max_v3f64: 3734; CHECK: # %bb.0: # %entry 3735; CHECK-NEXT: subq $24, %rsp 3736; CHECK-NEXT: .cfi_def_cfa_offset 32 3737; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0] 3738; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0] 3739; CHECK-NEXT: callq fmax@PLT 3740; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3741; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0] 3742; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0] 3743; CHECK-NEXT: callq fmax@PLT 3744; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 3745; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0] 3746; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0] 3747; CHECK-NEXT: callq fmax@PLT 3748; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 3749; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 3750; CHECK-NEXT: wait 3751; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 3752; CHECK-NEXT: # xmm0 = mem[0],zero 3753; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 3754; CHECK-NEXT: # xmm1 = mem[0],zero 3755; CHECK-NEXT: addq $24, %rsp 3756; CHECK-NEXT: .cfi_def_cfa_offset 8 3757; CHECK-NEXT: retq 3758; 3759; AVX-LABEL: constrained_vector_max_v3f64: 3760; AVX: # %bb.0: # %entry 3761; AVX-NEXT: subq $40, %rsp 3762; AVX-NEXT: .cfi_def_cfa_offset 48 3763; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0] 3764; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0] 3765; AVX-NEXT: callq fmax@PLT 3766; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3767; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0] 3768; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0] 3769; AVX-NEXT: callq fmax@PLT 3770; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 3771; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3772; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 3773; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0] 3774; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0] 3775; AVX-NEXT: vzeroupper 3776; AVX-NEXT: callq fmax@PLT 3777; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 3778; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3779; AVX-NEXT: addq $40, %rsp 3780; AVX-NEXT: .cfi_def_cfa_offset 8 3781; AVX-NEXT: retq 3782entry: 3783 %max = call <3 x double> @llvm.experimental.constrained.maxnum.v3f64( 3784 <3 x double> <double 43.0, double 44.0, double 45.0>, 3785 <3 x double> <double 40.0, double 41.0, double 42.0>, 3786 metadata !"fpexcept.strict") #0 3787 ret <3 x double> %max 3788} 3789 3790define <4 x double> @constrained_vector_maxnum_v4f64() #0 { 3791; CHECK-LABEL: constrained_vector_maxnum_v4f64: 3792; CHECK: # %bb.0: # %entry 3793; CHECK-NEXT: subq $40, %rsp 3794; CHECK-NEXT: .cfi_def_cfa_offset 48 3795; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0] 3796; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0] 3797; CHECK-NEXT: callq fmax@PLT 3798; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3799; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0] 3800; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0] 3801; CHECK-NEXT: callq fmax@PLT 3802; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 3803; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3804; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3805; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.7E+1,0.0E+0] 3806; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.3E+1,0.0E+0] 3807; CHECK-NEXT: callq fmax@PLT 3808; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3809; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.6E+1,0.0E+0] 3810; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0] 3811; CHECK-NEXT: callq fmax@PLT 3812; CHECK-NEXT: movaps %xmm0, %xmm1 3813; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 3814; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 3815; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 3816; CHECK-NEXT: addq $40, %rsp 3817; CHECK-NEXT: .cfi_def_cfa_offset 8 3818; CHECK-NEXT: retq 3819; 3820; AVX-LABEL: constrained_vector_maxnum_v4f64: 3821; AVX: # %bb.0: # %entry 3822; AVX-NEXT: subq $40, %rsp 3823; AVX-NEXT: .cfi_def_cfa_offset 48 3824; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.7E+1,0.0E+0] 3825; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.3E+1,0.0E+0] 3826; AVX-NEXT: callq fmax@PLT 3827; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3828; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.6E+1,0.0E+0] 3829; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0] 3830; AVX-NEXT: callq fmax@PLT 3831; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 3832; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3833; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3834; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0] 3835; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0] 3836; AVX-NEXT: callq fmax@PLT 3837; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3838; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0] 3839; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0] 3840; AVX-NEXT: callq fmax@PLT 3841; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 3842; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3843; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 3844; AVX-NEXT: addq $40, %rsp 3845; AVX-NEXT: .cfi_def_cfa_offset 8 3846; AVX-NEXT: retq 3847entry: 3848 %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64( 3849 <4 x double> <double 44.0, double 45.0, 3850 double 46.0, double 47.0>, 3851 <4 x double> <double 40.0, double 41.0, 3852 double 42.0, double 43.0>, 3853 metadata !"fpexcept.strict") #0 3854 ret <4 x double> %max 3855} 3856 3857define <1 x float> @constrained_vector_minnum_v1f32() #0 { 3858; CHECK-LABEL: constrained_vector_minnum_v1f32: 3859; CHECK: # %bb.0: # %entry 3860; CHECK-NEXT: pushq %rax 3861; CHECK-NEXT: .cfi_def_cfa_offset 16 3862; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 3863; CHECK-NEXT: movss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0] 3864; CHECK-NEXT: callq fminf@PLT 3865; CHECK-NEXT: popq %rax 3866; CHECK-NEXT: .cfi_def_cfa_offset 8 3867; CHECK-NEXT: retq 3868; 3869; AVX-LABEL: constrained_vector_minnum_v1f32: 3870; AVX: # %bb.0: # %entry 3871; AVX-NEXT: pushq %rax 3872; AVX-NEXT: .cfi_def_cfa_offset 16 3873; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 3874; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0] 3875; AVX-NEXT: callq fminf@PLT 3876; AVX-NEXT: popq %rax 3877; AVX-NEXT: .cfi_def_cfa_offset 8 3878; AVX-NEXT: retq 3879 entry: 3880 %min = call <1 x float> @llvm.experimental.constrained.minnum.v1f32( 3881 <1 x float> <float 42.0>, <1 x float> <float 41.0>, 3882 metadata !"fpexcept.strict") #0 3883 ret <1 x float> %min 3884} 3885 3886define <2 x double> @constrained_vector_minnum_v2f64() #0 { 3887; CHECK-LABEL: constrained_vector_minnum_v2f64: 3888; CHECK: # %bb.0: # %entry 3889; CHECK-NEXT: subq $24, %rsp 3890; CHECK-NEXT: .cfi_def_cfa_offset 32 3891; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 3892; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0] 3893; CHECK-NEXT: callq fmin@PLT 3894; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3895; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0] 3896; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0] 3897; CHECK-NEXT: callq fmin@PLT 3898; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 3899; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 3900; CHECK-NEXT: addq $24, %rsp 3901; CHECK-NEXT: .cfi_def_cfa_offset 8 3902; CHECK-NEXT: retq 3903; 3904; AVX-LABEL: constrained_vector_minnum_v2f64: 3905; AVX: # %bb.0: # %entry 3906; AVX-NEXT: subq $24, %rsp 3907; AVX-NEXT: .cfi_def_cfa_offset 32 3908; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 3909; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0] 3910; AVX-NEXT: callq fmin@PLT 3911; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3912; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0] 3913; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0] 3914; AVX-NEXT: callq fmin@PLT 3915; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 3916; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 3917; AVX-NEXT: addq $24, %rsp 3918; AVX-NEXT: .cfi_def_cfa_offset 8 3919; AVX-NEXT: retq 3920entry: 3921 %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64( 3922 <2 x double> <double 43.0, double 42.0>, 3923 <2 x double> <double 41.0, double 40.0>, 3924 metadata !"fpexcept.strict") #0 3925 ret <2 x double> %min 3926} 3927 3928define <3 x float> @constrained_vector_minnum_v3f32() #0 { 3929; CHECK-LABEL: constrained_vector_minnum_v3f32: 3930; CHECK: # %bb.0: # %entry 3931; CHECK-NEXT: subq $40, %rsp 3932; CHECK-NEXT: .cfi_def_cfa_offset 48 3933; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.5E+1,0.0E+0,0.0E+0,0.0E+0] 3934; CHECK-NEXT: movss {{.*#+}} xmm1 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 3935; CHECK-NEXT: callq fminf@PLT 3936; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3937; CHECK-NEXT: movss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0] 3938; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 3939; CHECK-NEXT: callq fminf@PLT 3940; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 3941; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 3942; CHECK-NEXT: movss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 3943; CHECK-NEXT: callq fminf@PLT 3944; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 3945; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3946; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 3947; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 3948; CHECK-NEXT: movaps %xmm1, %xmm0 3949; CHECK-NEXT: addq $40, %rsp 3950; CHECK-NEXT: .cfi_def_cfa_offset 8 3951; CHECK-NEXT: retq 3952; 3953; AVX-LABEL: constrained_vector_minnum_v3f32: 3954; AVX: # %bb.0: # %entry 3955; AVX-NEXT: subq $40, %rsp 3956; AVX-NEXT: .cfi_def_cfa_offset 48 3957; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.5E+1,0.0E+0,0.0E+0,0.0E+0] 3958; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 3959; AVX-NEXT: callq fminf@PLT 3960; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 3961; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.1E+1,0.0E+0,0.0E+0,0.0E+0] 3962; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 3963; AVX-NEXT: callq fminf@PLT 3964; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 3965; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 3966; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 3967; AVX-NEXT: callq fminf@PLT 3968; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 3969; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 3970; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 3971; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 3972; AVX-NEXT: addq $40, %rsp 3973; AVX-NEXT: .cfi_def_cfa_offset 8 3974; AVX-NEXT: retq 3975entry: 3976 %min = call <3 x float> @llvm.experimental.constrained.minnum.v3f32( 3977 <3 x float> <float 43.0, float 44.0, float 45.0>, 3978 <3 x float> <float 41.0, float 42.0, float 43.0>, 3979 metadata !"fpexcept.strict") #0 3980 ret <3 x float> %min 3981} 3982 3983define <3 x double> @constrained_vector_min_v3f64() #0 { 3984; CHECK-LABEL: constrained_vector_min_v3f64: 3985; CHECK: # %bb.0: # %entry 3986; CHECK-NEXT: subq $24, %rsp 3987; CHECK-NEXT: .cfi_def_cfa_offset 32 3988; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0] 3989; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0] 3990; CHECK-NEXT: callq fmin@PLT 3991; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 3992; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0] 3993; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0] 3994; CHECK-NEXT: callq fmin@PLT 3995; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 3996; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0] 3997; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0] 3998; CHECK-NEXT: callq fmin@PLT 3999; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 4000; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 4001; CHECK-NEXT: wait 4002; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 4003; CHECK-NEXT: # xmm0 = mem[0],zero 4004; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 4005; CHECK-NEXT: # xmm1 = mem[0],zero 4006; CHECK-NEXT: addq $24, %rsp 4007; CHECK-NEXT: .cfi_def_cfa_offset 8 4008; CHECK-NEXT: retq 4009; 4010; AVX-LABEL: constrained_vector_min_v3f64: 4011; AVX: # %bb.0: # %entry 4012; AVX-NEXT: subq $40, %rsp 4013; AVX-NEXT: .cfi_def_cfa_offset 48 4014; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0] 4015; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0] 4016; AVX-NEXT: callq fmin@PLT 4017; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 4018; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.3E+1,0.0E+0] 4019; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0] 4020; AVX-NEXT: callq fmin@PLT 4021; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 4022; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 4023; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 4024; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0] 4025; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0] 4026; AVX-NEXT: vzeroupper 4027; AVX-NEXT: callq fmin@PLT 4028; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 4029; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4030; AVX-NEXT: addq $40, %rsp 4031; AVX-NEXT: .cfi_def_cfa_offset 8 4032; AVX-NEXT: retq 4033entry: 4034 %min = call <3 x double> @llvm.experimental.constrained.minnum.v3f64( 4035 <3 x double> <double 43.0, double 44.0, double 45.0>, 4036 <3 x double> <double 40.0, double 41.0, double 42.0>, 4037 metadata !"fpexcept.strict") #0 4038 ret <3 x double> %min 4039} 4040 4041define <4 x double> @constrained_vector_minnum_v4f64() #0 { 4042; CHECK-LABEL: constrained_vector_minnum_v4f64: 4043; CHECK: # %bb.0: # %entry 4044; CHECK-NEXT: subq $40, %rsp 4045; CHECK-NEXT: .cfi_def_cfa_offset 48 4046; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0] 4047; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0] 4048; CHECK-NEXT: callq fmin@PLT 4049; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 4050; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0] 4051; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0] 4052; CHECK-NEXT: callq fmin@PLT 4053; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 4054; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 4055; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 4056; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.7E+1,0.0E+0] 4057; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.3E+1,0.0E+0] 4058; CHECK-NEXT: callq fmin@PLT 4059; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 4060; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.6E+1,0.0E+0] 4061; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0] 4062; CHECK-NEXT: callq fmin@PLT 4063; CHECK-NEXT: movaps %xmm0, %xmm1 4064; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 4065; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 4066; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 4067; CHECK-NEXT: addq $40, %rsp 4068; CHECK-NEXT: .cfi_def_cfa_offset 8 4069; CHECK-NEXT: retq 4070; 4071; AVX-LABEL: constrained_vector_minnum_v4f64: 4072; AVX: # %bb.0: # %entry 4073; AVX-NEXT: subq $40, %rsp 4074; AVX-NEXT: .cfi_def_cfa_offset 48 4075; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.7E+1,0.0E+0] 4076; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.3E+1,0.0E+0] 4077; AVX-NEXT: callq fmin@PLT 4078; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 4079; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.6E+1,0.0E+0] 4080; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.2E+1,0.0E+0] 4081; AVX-NEXT: callq fmin@PLT 4082; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 4083; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 4084; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 4085; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.5E+1,0.0E+0] 4086; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.1E+1,0.0E+0] 4087; AVX-NEXT: callq fmin@PLT 4088; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 4089; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.4E+1,0.0E+0] 4090; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.0E+1,0.0E+0] 4091; AVX-NEXT: callq fmin@PLT 4092; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 4093; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 4094; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 4095; AVX-NEXT: addq $40, %rsp 4096; AVX-NEXT: .cfi_def_cfa_offset 8 4097; AVX-NEXT: retq 4098entry: 4099 %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64( 4100 <4 x double> <double 44.0, double 45.0, 4101 double 46.0, double 47.0>, 4102 <4 x double> <double 40.0, double 41.0, 4103 double 42.0, double 43.0>, 4104 metadata !"fpexcept.strict") #0 4105 ret <4 x double> %min 4106} 4107 4108define <1 x i32> @constrained_vector_fptosi_v1i32_v1f32() #0 { 4109; CHECK-LABEL: constrained_vector_fptosi_v1i32_v1f32: 4110; CHECK: # %bb.0: # %entry 4111; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4112; CHECK-NEXT: retq 4113; 4114; AVX-LABEL: constrained_vector_fptosi_v1i32_v1f32: 4115; AVX: # %bb.0: # %entry 4116; AVX-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4117; AVX-NEXT: retq 4118entry: 4119 %result = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f32( 4120 <1 x float><float 42.0>, 4121 metadata !"fpexcept.strict") #0 4122 ret <1 x i32> %result 4123} 4124 4125define <2 x i32> @constrained_vector_fptosi_v2i32_v2f32() #0 { 4126; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f32: 4127; CHECK: # %bb.0: # %entry 4128; CHECK-NEXT: cvttps2dq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 4129; CHECK-NEXT: retq 4130; 4131; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f32: 4132; AVX: # %bb.0: # %entry 4133; AVX-NEXT: vcvttps2dq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 4134; AVX-NEXT: retq 4135entry: 4136 %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32( 4137 <2 x float><float 42.0, float 43.0>, 4138 metadata !"fpexcept.strict") #0 4139 ret <2 x i32> %result 4140} 4141 4142define <3 x i32> @constrained_vector_fptosi_v3i32_v3f32() #0 { 4143; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f32: 4144; CHECK: # %bb.0: # %entry 4145; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4146; CHECK-NEXT: movd %eax, %xmm1 4147; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4148; CHECK-NEXT: movd %eax, %xmm0 4149; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4150; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4151; CHECK-NEXT: movd %eax, %xmm1 4152; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4153; CHECK-NEXT: retq 4154; 4155; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f32: 4156; AVX: # %bb.0: # %entry 4157; AVX-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4158; AVX-NEXT: vmovd %eax, %xmm0 4159; AVX-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4160; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 4161; AVX-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4162; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 4163; AVX-NEXT: retq 4164entry: 4165 %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32( 4166 <3 x float><float 42.0, float 43.0, 4167 float 44.0>, 4168 metadata !"fpexcept.strict") #0 4169 ret <3 x i32> %result 4170} 4171 4172define <4 x i32> @constrained_vector_fptosi_v4i32_v4f32() #0 { 4173; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f32: 4174; CHECK: # %bb.0: # %entry 4175; CHECK-NEXT: cvttps2dq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 4176; CHECK-NEXT: retq 4177; 4178; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f32: 4179; AVX: # %bb.0: # %entry 4180; AVX-NEXT: vcvttps2dq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 4181; AVX-NEXT: retq 4182entry: 4183 %result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32( 4184 <4 x float><float 42.0, float 43.0, 4185 float 44.0, float 45.0>, 4186 metadata !"fpexcept.strict") #0 4187 ret <4 x i32> %result 4188} 4189 4190define <1 x i64> @constrained_vector_fptosi_v1i64_v1f32() #0 { 4191; CHECK-LABEL: constrained_vector_fptosi_v1i64_v1f32: 4192; CHECK: # %bb.0: # %entry 4193; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4194; CHECK-NEXT: retq 4195; 4196; AVX-LABEL: constrained_vector_fptosi_v1i64_v1f32: 4197; AVX: # %bb.0: # %entry 4198; AVX-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4199; AVX-NEXT: retq 4200entry: 4201 %result = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f32( 4202 <1 x float><float 42.0>, 4203 metadata !"fpexcept.strict") #0 4204 ret <1 x i64> %result 4205} 4206 4207define <2 x i64> @constrained_vector_fptosi_v2i64_v2f32() #0 { 4208; CHECK-LABEL: constrained_vector_fptosi_v2i64_v2f32: 4209; CHECK: # %bb.0: # %entry 4210; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4211; CHECK-NEXT: movq %rax, %xmm1 4212; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4213; CHECK-NEXT: movq %rax, %xmm0 4214; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4215; CHECK-NEXT: retq 4216; 4217; AVX1-LABEL: constrained_vector_fptosi_v2i64_v2f32: 4218; AVX1: # %bb.0: # %entry 4219; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4220; AVX1-NEXT: vmovq %rax, %xmm0 4221; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4222; AVX1-NEXT: vmovq %rax, %xmm1 4223; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4224; AVX1-NEXT: retq 4225; 4226; AVX512F-LABEL: constrained_vector_fptosi_v2i64_v2f32: 4227; AVX512F: # %bb.0: # %entry 4228; AVX512F-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4229; AVX512F-NEXT: vmovq %rax, %xmm0 4230; AVX512F-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4231; AVX512F-NEXT: vmovq %rax, %xmm1 4232; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4233; AVX512F-NEXT: retq 4234; 4235; AVX512DQ-LABEL: constrained_vector_fptosi_v2i64_v2f32: 4236; AVX512DQ: # %bb.0: # %entry 4237; AVX512DQ-NEXT: vcvttps2qq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0 4238; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 4239; AVX512DQ-NEXT: vzeroupper 4240; AVX512DQ-NEXT: retq 4241entry: 4242 %result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32( 4243 <2 x float><float 42.0, float 43.0>, 4244 metadata !"fpexcept.strict") #0 4245 ret <2 x i64> %result 4246} 4247 4248define <3 x i64> @constrained_vector_fptosi_v3i64_v3f32() #0 { 4249; CHECK-LABEL: constrained_vector_fptosi_v3i64_v3f32: 4250; CHECK: # %bb.0: # %entry 4251; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx 4252; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx 4253; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4254; CHECK-NEXT: retq 4255; 4256; AVX1-LABEL: constrained_vector_fptosi_v3i64_v3f32: 4257; AVX1: # %bb.0: # %entry 4258; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4259; AVX1-NEXT: vmovq %rax, %xmm0 4260; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4261; AVX1-NEXT: vmovq %rax, %xmm1 4262; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4263; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4264; AVX1-NEXT: vmovq %rax, %xmm1 4265; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4266; AVX1-NEXT: retq 4267; 4268; AVX512-LABEL: constrained_vector_fptosi_v3i64_v3f32: 4269; AVX512: # %bb.0: # %entry 4270; AVX512-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4271; AVX512-NEXT: vmovq %rax, %xmm0 4272; AVX512-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4273; AVX512-NEXT: vmovq %rax, %xmm1 4274; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4275; AVX512-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4276; AVX512-NEXT: vmovq %rax, %xmm1 4277; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4278; AVX512-NEXT: retq 4279entry: 4280 %result = call <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f32( 4281 <3 x float><float 42.0, float 43.0, 4282 float 44.0>, 4283 metadata !"fpexcept.strict") #0 4284 ret <3 x i64> %result 4285} 4286 4287define <4 x i64> @constrained_vector_fptosi_v4i64_v4f32() #0 { 4288; CHECK-LABEL: constrained_vector_fptosi_v4i64_v4f32: 4289; CHECK: # %bb.0: # %entry 4290; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4291; CHECK-NEXT: movq %rax, %xmm1 4292; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4293; CHECK-NEXT: movq %rax, %xmm0 4294; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4295; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4296; CHECK-NEXT: movq %rax, %xmm2 4297; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4298; CHECK-NEXT: movq %rax, %xmm1 4299; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 4300; CHECK-NEXT: retq 4301; 4302; AVX1-LABEL: constrained_vector_fptosi_v4i64_v4f32: 4303; AVX1: # %bb.0: # %entry 4304; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4305; AVX1-NEXT: vmovq %rax, %xmm0 4306; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4307; AVX1-NEXT: vmovq %rax, %xmm1 4308; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4309; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4310; AVX1-NEXT: vmovq %rax, %xmm1 4311; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4312; AVX1-NEXT: vmovq %rax, %xmm2 4313; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 4314; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4315; AVX1-NEXT: retq 4316; 4317; AVX512F-LABEL: constrained_vector_fptosi_v4i64_v4f32: 4318; AVX512F: # %bb.0: # %entry 4319; AVX512F-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4320; AVX512F-NEXT: vmovq %rax, %xmm0 4321; AVX512F-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4322; AVX512F-NEXT: vmovq %rax, %xmm1 4323; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4324; AVX512F-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4325; AVX512F-NEXT: vmovq %rax, %xmm1 4326; AVX512F-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4327; AVX512F-NEXT: vmovq %rax, %xmm2 4328; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 4329; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 4330; AVX512F-NEXT: retq 4331; 4332; AVX512DQ-LABEL: constrained_vector_fptosi_v4i64_v4f32: 4333; AVX512DQ: # %bb.0: # %entry 4334; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1] 4335; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0 4336; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 4337; AVX512DQ-NEXT: retq 4338entry: 4339 %result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32( 4340 <4 x float><float 42.0, float 43.0, 4341 float 44.0, float 45.0>, 4342 metadata !"fpexcept.strict") #0 4343 ret <4 x i64> %result 4344} 4345 4346define <1 x i32> @constrained_vector_fptosi_v1i32_v1f64() #0 { 4347; CHECK-LABEL: constrained_vector_fptosi_v1i32_v1f64: 4348; CHECK: # %bb.0: # %entry 4349; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4350; CHECK-NEXT: retq 4351; 4352; AVX-LABEL: constrained_vector_fptosi_v1i32_v1f64: 4353; AVX: # %bb.0: # %entry 4354; AVX-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4355; AVX-NEXT: retq 4356entry: 4357 %result = call <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64( 4358 <1 x double><double 42.1>, 4359 metadata !"fpexcept.strict") #0 4360 ret <1 x i32> %result 4361} 4362 4363 4364define <2 x i32> @constrained_vector_fptosi_v2i32_v2f64() #0 { 4365; CHECK-LABEL: constrained_vector_fptosi_v2i32_v2f64: 4366; CHECK: # %bb.0: # %entry 4367; CHECK-NEXT: cvttpd2dq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 4368; CHECK-NEXT: retq 4369; 4370; AVX-LABEL: constrained_vector_fptosi_v2i32_v2f64: 4371; AVX: # %bb.0: # %entry 4372; AVX-NEXT: vcvttpd2dqx {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 4373; AVX-NEXT: retq 4374entry: 4375 %result = call <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64( 4376 <2 x double><double 42.1, double 42.2>, 4377 metadata !"fpexcept.strict") #0 4378 ret <2 x i32> %result 4379} 4380 4381define <3 x i32> @constrained_vector_fptosi_v3i32_v3f64() #0 { 4382; CHECK-LABEL: constrained_vector_fptosi_v3i32_v3f64: 4383; CHECK: # %bb.0: # %entry 4384; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4385; CHECK-NEXT: movd %eax, %xmm1 4386; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4387; CHECK-NEXT: movd %eax, %xmm0 4388; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4389; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4390; CHECK-NEXT: movd %eax, %xmm1 4391; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4392; CHECK-NEXT: retq 4393; 4394; AVX-LABEL: constrained_vector_fptosi_v3i32_v3f64: 4395; AVX: # %bb.0: # %entry 4396; AVX-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4397; AVX-NEXT: vmovd %eax, %xmm0 4398; AVX-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4399; AVX-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 4400; AVX-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4401; AVX-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 4402; AVX-NEXT: retq 4403entry: 4404 %result = call <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64( 4405 <3 x double><double 42.1, double 42.2, 4406 double 42.3>, 4407 metadata !"fpexcept.strict") #0 4408 ret <3 x i32> %result 4409} 4410 4411define <4 x i32> @constrained_vector_fptosi_v4i32_v4f64() #0 { 4412; CHECK-LABEL: constrained_vector_fptosi_v4i32_v4f64: 4413; CHECK: # %bb.0: # %entry 4414; CHECK-NEXT: cvttpd2dq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 4415; CHECK-NEXT: cvttpd2dq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 4416; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4417; CHECK-NEXT: retq 4418; 4419; AVX-LABEL: constrained_vector_fptosi_v4i32_v4f64: 4420; AVX: # %bb.0: # %entry 4421; AVX-NEXT: vcvttpd2dqy {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 4422; AVX-NEXT: retq 4423entry: 4424 %result = call <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64( 4425 <4 x double><double 42.1, double 42.2, 4426 double 42.3, double 42.4>, 4427 metadata !"fpexcept.strict") #0 4428 ret <4 x i32> %result 4429} 4430 4431define <1 x i64> @constrained_vector_fptosi_v1i64_v1f64() #0 { 4432; CHECK-LABEL: constrained_vector_fptosi_v1i64_v1f64: 4433; CHECK: # %bb.0: # %entry 4434; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4435; CHECK-NEXT: retq 4436; 4437; AVX-LABEL: constrained_vector_fptosi_v1i64_v1f64: 4438; AVX: # %bb.0: # %entry 4439; AVX-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4440; AVX-NEXT: retq 4441entry: 4442 %result = call <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64( 4443 <1 x double><double 42.1>, 4444 metadata !"fpexcept.strict") #0 4445 ret <1 x i64> %result 4446} 4447 4448define <2 x i64> @constrained_vector_fptosi_v2i64_v2f64() #0 { 4449; CHECK-LABEL: constrained_vector_fptosi_v2i64_v2f64: 4450; CHECK: # %bb.0: # %entry 4451; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4452; CHECK-NEXT: movq %rax, %xmm1 4453; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4454; CHECK-NEXT: movq %rax, %xmm0 4455; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4456; CHECK-NEXT: retq 4457; 4458; AVX1-LABEL: constrained_vector_fptosi_v2i64_v2f64: 4459; AVX1: # %bb.0: # %entry 4460; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4461; AVX1-NEXT: vmovq %rax, %xmm0 4462; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4463; AVX1-NEXT: vmovq %rax, %xmm1 4464; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4465; AVX1-NEXT: retq 4466; 4467; AVX512F-LABEL: constrained_vector_fptosi_v2i64_v2f64: 4468; AVX512F: # %bb.0: # %entry 4469; AVX512F-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4470; AVX512F-NEXT: vmovq %rax, %xmm0 4471; AVX512F-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4472; AVX512F-NEXT: vmovq %rax, %xmm1 4473; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4474; AVX512F-NEXT: retq 4475; 4476; AVX512DQ-LABEL: constrained_vector_fptosi_v2i64_v2f64: 4477; AVX512DQ: # %bb.0: # %entry 4478; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2100000000000001E+1,4.2200000000000003E+1] 4479; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 4480; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 4481; AVX512DQ-NEXT: vzeroupper 4482; AVX512DQ-NEXT: retq 4483entry: 4484 %result = call <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64( 4485 <2 x double><double 42.1, double 42.2>, 4486 metadata !"fpexcept.strict") #0 4487 ret <2 x i64> %result 4488} 4489 4490define <3 x i64> @constrained_vector_fptosi_v3i64_v3f64() #0 { 4491; CHECK-LABEL: constrained_vector_fptosi_v3i64_v3f64: 4492; CHECK: # %bb.0: # %entry 4493; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx 4494; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rdx 4495; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4496; CHECK-NEXT: retq 4497; 4498; AVX1-LABEL: constrained_vector_fptosi_v3i64_v3f64: 4499; AVX1: # %bb.0: # %entry 4500; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4501; AVX1-NEXT: vmovq %rax, %xmm0 4502; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4503; AVX1-NEXT: vmovq %rax, %xmm1 4504; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4505; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4506; AVX1-NEXT: vmovq %rax, %xmm1 4507; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4508; AVX1-NEXT: retq 4509; 4510; AVX512-LABEL: constrained_vector_fptosi_v3i64_v3f64: 4511; AVX512: # %bb.0: # %entry 4512; AVX512-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4513; AVX512-NEXT: vmovq %rax, %xmm0 4514; AVX512-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4515; AVX512-NEXT: vmovq %rax, %xmm1 4516; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4517; AVX512-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4518; AVX512-NEXT: vmovq %rax, %xmm1 4519; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4520; AVX512-NEXT: retq 4521entry: 4522 %result = call <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f64( 4523 <3 x double><double 42.1, double 42.2, 4524 double 42.3>, 4525 metadata !"fpexcept.strict") #0 4526 ret <3 x i64> %result 4527} 4528 4529define <4 x i64> @constrained_vector_fptosi_v4i64_v4f64() #0 { 4530; CHECK-LABEL: constrained_vector_fptosi_v4i64_v4f64: 4531; CHECK: # %bb.0: # %entry 4532; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4533; CHECK-NEXT: movq %rax, %xmm1 4534; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4535; CHECK-NEXT: movq %rax, %xmm0 4536; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4537; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4538; CHECK-NEXT: movq %rax, %xmm2 4539; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4540; CHECK-NEXT: movq %rax, %xmm1 4541; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 4542; CHECK-NEXT: retq 4543; 4544; AVX1-LABEL: constrained_vector_fptosi_v4i64_v4f64: 4545; AVX1: # %bb.0: # %entry 4546; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4547; AVX1-NEXT: vmovq %rax, %xmm0 4548; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4549; AVX1-NEXT: vmovq %rax, %xmm1 4550; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4551; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4552; AVX1-NEXT: vmovq %rax, %xmm1 4553; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4554; AVX1-NEXT: vmovq %rax, %xmm2 4555; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 4556; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4557; AVX1-NEXT: retq 4558; 4559; AVX512F-LABEL: constrained_vector_fptosi_v4i64_v4f64: 4560; AVX512F: # %bb.0: # %entry 4561; AVX512F-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4562; AVX512F-NEXT: vmovq %rax, %xmm0 4563; AVX512F-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4564; AVX512F-NEXT: vmovq %rax, %xmm1 4565; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4566; AVX512F-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4567; AVX512F-NEXT: vmovq %rax, %xmm1 4568; AVX512F-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4569; AVX512F-NEXT: vmovq %rax, %xmm2 4570; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 4571; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 4572; AVX512F-NEXT: retq 4573; 4574; AVX512DQ-LABEL: constrained_vector_fptosi_v4i64_v4f64: 4575; AVX512DQ: # %bb.0: # %entry 4576; AVX512DQ-NEXT: vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1] 4577; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 4578; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 4579; AVX512DQ-NEXT: retq 4580entry: 4581 %result = call <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64( 4582 <4 x double><double 42.1, double 42.2, 4583 double 42.3, double 42.4>, 4584 metadata !"fpexcept.strict") #0 4585 ret <4 x i64> %result 4586} 4587 4588define <1 x i32> @constrained_vector_fptoui_v1i32_v1f32() #0 { 4589; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f32: 4590; CHECK: # %bb.0: # %entry 4591; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4592; CHECK-NEXT: # kill: def $eax killed $eax killed $rax 4593; CHECK-NEXT: retq 4594; 4595; AVX1-LABEL: constrained_vector_fptoui_v1i32_v1f32: 4596; AVX1: # %bb.0: # %entry 4597; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4598; AVX1-NEXT: # kill: def $eax killed $eax killed $rax 4599; AVX1-NEXT: retq 4600; 4601; AVX512-LABEL: constrained_vector_fptoui_v1i32_v1f32: 4602; AVX512: # %bb.0: # %entry 4603; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4604; AVX512-NEXT: retq 4605entry: 4606 %result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f32( 4607 <1 x float><float 42.0>, 4608 metadata !"fpexcept.strict") #0 4609 ret <1 x i32> %result 4610} 4611 4612define <2 x i32> @constrained_vector_fptoui_v2i32_v2f32() #0 { 4613; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f32: 4614; CHECK: # %bb.0: # %entry 4615; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4616; CHECK-NEXT: movd %eax, %xmm1 4617; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4618; CHECK-NEXT: movd %eax, %xmm0 4619; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4620; CHECK-NEXT: retq 4621; 4622; AVX1-LABEL: constrained_vector_fptoui_v2i32_v2f32: 4623; AVX1: # %bb.0: # %entry 4624; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4625; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx 4626; AVX1-NEXT: vmovd %ecx, %xmm0 4627; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 4628; AVX1-NEXT: retq 4629; 4630; AVX512-LABEL: constrained_vector_fptoui_v2i32_v2f32: 4631; AVX512: # %bb.0: # %entry 4632; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,4.3E+1,0.0E+0,0.0E+0] 4633; AVX512-NEXT: vcvttps2udq %zmm0, %zmm0 4634; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 4635; AVX512-NEXT: vzeroupper 4636; AVX512-NEXT: retq 4637entry: 4638 %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32( 4639 <2 x float><float 42.0, float 43.0>, 4640 metadata !"fpexcept.strict") #0 4641 ret <2 x i32> %result 4642} 4643 4644define <3 x i32> @constrained_vector_fptoui_v3i32_v3f32() #0 { 4645; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f32: 4646; CHECK: # %bb.0: # %entry 4647; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4648; CHECK-NEXT: movd %eax, %xmm1 4649; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4650; CHECK-NEXT: movd %eax, %xmm0 4651; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4652; CHECK-NEXT: cvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4653; CHECK-NEXT: movd %eax, %xmm1 4654; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4655; CHECK-NEXT: retq 4656; 4657; AVX1-LABEL: constrained_vector_fptoui_v3i32_v3f32: 4658; AVX1: # %bb.0: # %entry 4659; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4660; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx 4661; AVX1-NEXT: vmovd %ecx, %xmm0 4662; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 4663; AVX1-NEXT: vcvttss2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4664; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 4665; AVX1-NEXT: retq 4666; 4667; AVX512-LABEL: constrained_vector_fptoui_v3i32_v3f32: 4668; AVX512: # %bb.0: # %entry 4669; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4670; AVX512-NEXT: vmovd %eax, %xmm0 4671; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4672; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 4673; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 4674; AVX512-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 4675; AVX512-NEXT: retq 4676entry: 4677 %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32( 4678 <3 x float><float 42.0, float 43.0, 4679 float 44.0>, 4680 metadata !"fpexcept.strict") #0 4681 ret <3 x i32> %result 4682} 4683 4684define <4 x i32> @constrained_vector_fptoui_v4i32_v4f32() #0 { 4685; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f32: 4686; CHECK: # %bb.0: # %entry 4687; CHECK-NEXT: movaps {{.*#+}} xmm0 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 4688; CHECK-NEXT: movaps {{.*#+}} xmm1 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1] 4689; CHECK-NEXT: movaps %xmm1, %xmm2 4690; CHECK-NEXT: cmpltps %xmm0, %xmm2 4691; CHECK-NEXT: movaps %xmm2, %xmm3 4692; CHECK-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 4693; CHECK-NEXT: andnps %xmm0, %xmm2 4694; CHECK-NEXT: subps %xmm2, %xmm1 4695; CHECK-NEXT: cvttps2dq %xmm1, %xmm0 4696; CHECK-NEXT: xorps %xmm3, %xmm0 4697; CHECK-NEXT: retq 4698; 4699; AVX1-LABEL: constrained_vector_fptoui_v4i32_v4f32: 4700; AVX1: # %bb.0: # %entry 4701; AVX1-NEXT: vbroadcastss {{.*#+}} xmm0 = [2.14748365E+9,2.14748365E+9,2.14748365E+9,2.14748365E+9] 4702; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1] 4703; AVX1-NEXT: vcmpltps %xmm0, %xmm1, %xmm2 4704; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 4705; AVX1-NEXT: vbroadcastss {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648] 4706; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm4, %xmm4 4707; AVX1-NEXT: vblendvps %xmm2, %xmm3, %xmm0, %xmm0 4708; AVX1-NEXT: vsubps %xmm0, %xmm1, %xmm0 4709; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 4710; AVX1-NEXT: vxorps %xmm4, %xmm0, %xmm0 4711; AVX1-NEXT: retq 4712; 4713; AVX512-LABEL: constrained_vector_fptoui_v4i32_v4f32: 4714; AVX512: # %bb.0: # %entry 4715; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1] 4716; AVX512-NEXT: vcvttps2udq %zmm0, %zmm0 4717; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 4718; AVX512-NEXT: vzeroupper 4719; AVX512-NEXT: retq 4720entry: 4721 %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32( 4722 <4 x float><float 42.0, float 43.0, 4723 float 44.0, float 45.0>, 4724 metadata !"fpexcept.strict") #0 4725 ret <4 x i32> %result 4726} 4727 4728define <1 x i64> @constrained_vector_fptoui_v1i64_v1f32() #0 { 4729; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f32: 4730; CHECK: # %bb.0: # %entry 4731; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 4732; CHECK-NEXT: movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 4733; CHECK-NEXT: comiss %xmm0, %xmm2 4734; CHECK-NEXT: xorps %xmm1, %xmm1 4735; CHECK-NEXT: ja .LBB121_2 4736; CHECK-NEXT: # %bb.1: # %entry 4737; CHECK-NEXT: movaps %xmm2, %xmm1 4738; CHECK-NEXT: .LBB121_2: # %entry 4739; CHECK-NEXT: subss %xmm1, %xmm0 4740; CHECK-NEXT: cvttss2si %xmm0, %rcx 4741; CHECK-NEXT: setbe %al 4742; CHECK-NEXT: movzbl %al, %eax 4743; CHECK-NEXT: shlq $63, %rax 4744; CHECK-NEXT: xorq %rcx, %rax 4745; CHECK-NEXT: retq 4746; 4747; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f32: 4748; AVX1: # %bb.0: # %entry 4749; AVX1-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 4750; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 4751; AVX1-NEXT: vcomiss %xmm0, %xmm1 4752; AVX1-NEXT: vxorps %xmm2, %xmm2, %xmm2 4753; AVX1-NEXT: ja .LBB121_2 4754; AVX1-NEXT: # %bb.1: # %entry 4755; AVX1-NEXT: vmovaps %xmm1, %xmm2 4756; AVX1-NEXT: .LBB121_2: # %entry 4757; AVX1-NEXT: vsubss %xmm2, %xmm0, %xmm0 4758; AVX1-NEXT: vcvttss2si %xmm0, %rcx 4759; AVX1-NEXT: setbe %al 4760; AVX1-NEXT: movzbl %al, %eax 4761; AVX1-NEXT: shlq $63, %rax 4762; AVX1-NEXT: xorq %rcx, %rax 4763; AVX1-NEXT: retq 4764; 4765; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f32: 4766; AVX512: # %bb.0: # %entry 4767; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4768; AVX512-NEXT: retq 4769entry: 4770 %result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f32( 4771 <1 x float><float 42.0>, 4772 metadata !"fpexcept.strict") #0 4773 ret <1 x i64> %result 4774} 4775 4776define <2 x i64> @constrained_vector_fptoui_v2i64_v2f32() #0 { 4777; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f32: 4778; CHECK: # %bb.0: # %entry 4779; CHECK-NEXT: movss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 4780; CHECK-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 4781; CHECK-NEXT: comiss %xmm2, %xmm1 4782; CHECK-NEXT: xorps %xmm0, %xmm0 4783; CHECK-NEXT: xorps %xmm3, %xmm3 4784; CHECK-NEXT: ja .LBB122_2 4785; CHECK-NEXT: # %bb.1: # %entry 4786; CHECK-NEXT: movaps %xmm1, %xmm3 4787; CHECK-NEXT: .LBB122_2: # %entry 4788; CHECK-NEXT: subss %xmm3, %xmm2 4789; CHECK-NEXT: cvttss2si %xmm2, %rax 4790; CHECK-NEXT: setbe %cl 4791; CHECK-NEXT: movzbl %cl, %ecx 4792; CHECK-NEXT: shlq $63, %rcx 4793; CHECK-NEXT: xorq %rax, %rcx 4794; CHECK-NEXT: movq %rcx, %xmm2 4795; CHECK-NEXT: movss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 4796; CHECK-NEXT: comiss %xmm3, %xmm1 4797; CHECK-NEXT: ja .LBB122_4 4798; CHECK-NEXT: # %bb.3: # %entry 4799; CHECK-NEXT: movaps %xmm1, %xmm0 4800; CHECK-NEXT: .LBB122_4: # %entry 4801; CHECK-NEXT: subss %xmm0, %xmm3 4802; CHECK-NEXT: cvttss2si %xmm3, %rax 4803; CHECK-NEXT: setbe %cl 4804; CHECK-NEXT: movzbl %cl, %ecx 4805; CHECK-NEXT: shlq $63, %rcx 4806; CHECK-NEXT: xorq %rax, %rcx 4807; CHECK-NEXT: movq %rcx, %xmm0 4808; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 4809; CHECK-NEXT: retq 4810; 4811; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f32: 4812; AVX1: # %bb.0: # %entry 4813; AVX1-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 4814; AVX1-NEXT: vmovss {{.*#+}} xmm0 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 4815; AVX1-NEXT: vcomiss %xmm2, %xmm0 4816; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 4817; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 4818; AVX1-NEXT: ja .LBB122_2 4819; AVX1-NEXT: # %bb.1: # %entry 4820; AVX1-NEXT: vmovaps %xmm0, %xmm3 4821; AVX1-NEXT: .LBB122_2: # %entry 4822; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2 4823; AVX1-NEXT: vcvttss2si %xmm2, %rax 4824; AVX1-NEXT: setbe %cl 4825; AVX1-NEXT: movzbl %cl, %ecx 4826; AVX1-NEXT: shlq $63, %rcx 4827; AVX1-NEXT: xorq %rax, %rcx 4828; AVX1-NEXT: vmovq %rcx, %xmm2 4829; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 4830; AVX1-NEXT: vcomiss %xmm3, %xmm0 4831; AVX1-NEXT: ja .LBB122_4 4832; AVX1-NEXT: # %bb.3: # %entry 4833; AVX1-NEXT: vmovaps %xmm0, %xmm1 4834; AVX1-NEXT: .LBB122_4: # %entry 4835; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0 4836; AVX1-NEXT: vcvttss2si %xmm0, %rax 4837; AVX1-NEXT: setbe %cl 4838; AVX1-NEXT: movzbl %cl, %ecx 4839; AVX1-NEXT: shlq $63, %rcx 4840; AVX1-NEXT: xorq %rax, %rcx 4841; AVX1-NEXT: vmovq %rcx, %xmm0 4842; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 4843; AVX1-NEXT: retq 4844; 4845; AVX512F-LABEL: constrained_vector_fptoui_v2i64_v2f32: 4846; AVX512F: # %bb.0: # %entry 4847; AVX512F-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4848; AVX512F-NEXT: vmovq %rax, %xmm0 4849; AVX512F-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4850; AVX512F-NEXT: vmovq %rax, %xmm1 4851; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4852; AVX512F-NEXT: retq 4853; 4854; AVX512DQ-LABEL: constrained_vector_fptoui_v2i64_v2f32: 4855; AVX512DQ: # %bb.0: # %entry 4856; AVX512DQ-NEXT: vcvttps2uqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0 4857; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 4858; AVX512DQ-NEXT: vzeroupper 4859; AVX512DQ-NEXT: retq 4860entry: 4861 %result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32( 4862 <2 x float><float 42.0, float 43.0>, 4863 metadata !"fpexcept.strict") #0 4864 ret <2 x i64> %result 4865} 4866 4867define <3 x i64> @constrained_vector_fptoui_v3i64_v3f32() #0 { 4868; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f32: 4869; CHECK: # %bb.0: # %entry 4870; CHECK-NEXT: movss {{.*#+}} xmm2 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 4871; CHECK-NEXT: movss {{.*#+}} xmm1 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 4872; CHECK-NEXT: comiss %xmm2, %xmm1 4873; CHECK-NEXT: xorps %xmm0, %xmm0 4874; CHECK-NEXT: xorps %xmm3, %xmm3 4875; CHECK-NEXT: ja .LBB123_2 4876; CHECK-NEXT: # %bb.1: # %entry 4877; CHECK-NEXT: movaps %xmm1, %xmm3 4878; CHECK-NEXT: .LBB123_2: # %entry 4879; CHECK-NEXT: subss %xmm3, %xmm2 4880; CHECK-NEXT: cvttss2si %xmm2, %rcx 4881; CHECK-NEXT: setbe %al 4882; CHECK-NEXT: movzbl %al, %eax 4883; CHECK-NEXT: shlq $63, %rax 4884; CHECK-NEXT: xorq %rcx, %rax 4885; CHECK-NEXT: movss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 4886; CHECK-NEXT: comiss %xmm2, %xmm1 4887; CHECK-NEXT: xorps %xmm3, %xmm3 4888; CHECK-NEXT: ja .LBB123_4 4889; CHECK-NEXT: # %bb.3: # %entry 4890; CHECK-NEXT: movaps %xmm1, %xmm3 4891; CHECK-NEXT: .LBB123_4: # %entry 4892; CHECK-NEXT: subss %xmm3, %xmm2 4893; CHECK-NEXT: cvttss2si %xmm2, %rcx 4894; CHECK-NEXT: setbe %dl 4895; CHECK-NEXT: movzbl %dl, %edx 4896; CHECK-NEXT: shlq $63, %rdx 4897; CHECK-NEXT: xorq %rcx, %rdx 4898; CHECK-NEXT: movss {{.*#+}} xmm2 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 4899; CHECK-NEXT: comiss %xmm2, %xmm1 4900; CHECK-NEXT: ja .LBB123_6 4901; CHECK-NEXT: # %bb.5: # %entry 4902; CHECK-NEXT: movaps %xmm1, %xmm0 4903; CHECK-NEXT: .LBB123_6: # %entry 4904; CHECK-NEXT: subss %xmm0, %xmm2 4905; CHECK-NEXT: cvttss2si %xmm2, %rsi 4906; CHECK-NEXT: setbe %cl 4907; CHECK-NEXT: movzbl %cl, %ecx 4908; CHECK-NEXT: shlq $63, %rcx 4909; CHECK-NEXT: xorq %rsi, %rcx 4910; CHECK-NEXT: retq 4911; 4912; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f32: 4913; AVX1: # %bb.0: # %entry 4914; AVX1-NEXT: vmovss {{.*#+}} xmm2 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 4915; AVX1-NEXT: vmovss {{.*#+}} xmm0 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 4916; AVX1-NEXT: vcomiss %xmm2, %xmm0 4917; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 4918; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 4919; AVX1-NEXT: ja .LBB123_2 4920; AVX1-NEXT: # %bb.1: # %entry 4921; AVX1-NEXT: vmovaps %xmm0, %xmm3 4922; AVX1-NEXT: .LBB123_2: # %entry 4923; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2 4924; AVX1-NEXT: vcvttss2si %xmm2, %rax 4925; AVX1-NEXT: setbe %cl 4926; AVX1-NEXT: movzbl %cl, %ecx 4927; AVX1-NEXT: shlq $63, %rcx 4928; AVX1-NEXT: xorq %rax, %rcx 4929; AVX1-NEXT: vmovq %rcx, %xmm2 4930; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 4931; AVX1-NEXT: vcomiss %xmm3, %xmm0 4932; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4 4933; AVX1-NEXT: ja .LBB123_4 4934; AVX1-NEXT: # %bb.3: # %entry 4935; AVX1-NEXT: vmovaps %xmm0, %xmm4 4936; AVX1-NEXT: .LBB123_4: # %entry 4937; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3 4938; AVX1-NEXT: vcvttss2si %xmm3, %rax 4939; AVX1-NEXT: setbe %cl 4940; AVX1-NEXT: movzbl %cl, %ecx 4941; AVX1-NEXT: shlq $63, %rcx 4942; AVX1-NEXT: xorq %rax, %rcx 4943; AVX1-NEXT: vmovq %rcx, %xmm3 4944; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 4945; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 4946; AVX1-NEXT: vcomiss %xmm3, %xmm0 4947; AVX1-NEXT: ja .LBB123_6 4948; AVX1-NEXT: # %bb.5: # %entry 4949; AVX1-NEXT: vmovaps %xmm0, %xmm1 4950; AVX1-NEXT: .LBB123_6: # %entry 4951; AVX1-NEXT: vsubss %xmm1, %xmm3, %xmm0 4952; AVX1-NEXT: vcvttss2si %xmm0, %rax 4953; AVX1-NEXT: setbe %cl 4954; AVX1-NEXT: movzbl %cl, %ecx 4955; AVX1-NEXT: shlq $63, %rcx 4956; AVX1-NEXT: xorq %rax, %rcx 4957; AVX1-NEXT: vmovq %rcx, %xmm0 4958; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 4959; AVX1-NEXT: retq 4960; 4961; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f32: 4962; AVX512: # %bb.0: # %entry 4963; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4964; AVX512-NEXT: vmovq %rax, %xmm0 4965; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4966; AVX512-NEXT: vmovq %rax, %xmm1 4967; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4968; AVX512-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 4969; AVX512-NEXT: vmovq %rax, %xmm1 4970; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4971; AVX512-NEXT: retq 4972entry: 4973 %result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f32( 4974 <3 x float><float 42.0, float 43.0, 4975 float 44.0>, 4976 metadata !"fpexcept.strict") #0 4977 ret <3 x i64> %result 4978} 4979 4980define <4 x i64> @constrained_vector_fptoui_v4i64_v4f32() #0 { 4981; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f32: 4982; CHECK: # %bb.0: # %entry 4983; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 4984; CHECK-NEXT: movss {{.*#+}} xmm2 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 4985; CHECK-NEXT: comiss %xmm0, %xmm2 4986; CHECK-NEXT: xorps %xmm1, %xmm1 4987; CHECK-NEXT: xorps %xmm3, %xmm3 4988; CHECK-NEXT: ja .LBB124_2 4989; CHECK-NEXT: # %bb.1: # %entry 4990; CHECK-NEXT: movaps %xmm2, %xmm3 4991; CHECK-NEXT: .LBB124_2: # %entry 4992; CHECK-NEXT: subss %xmm3, %xmm0 4993; CHECK-NEXT: cvttss2si %xmm0, %rcx 4994; CHECK-NEXT: setbe %al 4995; CHECK-NEXT: movzbl %al, %eax 4996; CHECK-NEXT: shlq $63, %rax 4997; CHECK-NEXT: xorq %rcx, %rax 4998; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 4999; CHECK-NEXT: comiss %xmm0, %xmm2 5000; CHECK-NEXT: xorps %xmm4, %xmm4 5001; CHECK-NEXT: ja .LBB124_4 5002; CHECK-NEXT: # %bb.3: # %entry 5003; CHECK-NEXT: movaps %xmm2, %xmm4 5004; CHECK-NEXT: .LBB124_4: # %entry 5005; CHECK-NEXT: movq %rax, %xmm3 5006; CHECK-NEXT: subss %xmm4, %xmm0 5007; CHECK-NEXT: cvttss2si %xmm0, %rax 5008; CHECK-NEXT: setbe %cl 5009; CHECK-NEXT: movzbl %cl, %ecx 5010; CHECK-NEXT: shlq $63, %rcx 5011; CHECK-NEXT: xorq %rax, %rcx 5012; CHECK-NEXT: movq %rcx, %xmm0 5013; CHECK-NEXT: movss {{.*#+}} xmm4 = [4.5E+1,0.0E+0,0.0E+0,0.0E+0] 5014; CHECK-NEXT: comiss %xmm4, %xmm2 5015; CHECK-NEXT: xorps %xmm5, %xmm5 5016; CHECK-NEXT: ja .LBB124_6 5017; CHECK-NEXT: # %bb.5: # %entry 5018; CHECK-NEXT: movaps %xmm2, %xmm5 5019; CHECK-NEXT: .LBB124_6: # %entry 5020; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 5021; CHECK-NEXT: subss %xmm5, %xmm4 5022; CHECK-NEXT: cvttss2si %xmm4, %rax 5023; CHECK-NEXT: setbe %cl 5024; CHECK-NEXT: movzbl %cl, %ecx 5025; CHECK-NEXT: shlq $63, %rcx 5026; CHECK-NEXT: xorq %rax, %rcx 5027; CHECK-NEXT: movq %rcx, %xmm3 5028; CHECK-NEXT: movss {{.*#+}} xmm4 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 5029; CHECK-NEXT: comiss %xmm4, %xmm2 5030; CHECK-NEXT: ja .LBB124_8 5031; CHECK-NEXT: # %bb.7: # %entry 5032; CHECK-NEXT: movaps %xmm2, %xmm1 5033; CHECK-NEXT: .LBB124_8: # %entry 5034; CHECK-NEXT: subss %xmm1, %xmm4 5035; CHECK-NEXT: cvttss2si %xmm4, %rax 5036; CHECK-NEXT: setbe %cl 5037; CHECK-NEXT: movzbl %cl, %ecx 5038; CHECK-NEXT: shlq $63, %rcx 5039; CHECK-NEXT: xorq %rax, %rcx 5040; CHECK-NEXT: movq %rcx, %xmm1 5041; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 5042; CHECK-NEXT: retq 5043; 5044; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f32: 5045; AVX1: # %bb.0: # %entry 5046; AVX1-NEXT: vmovss {{.*#+}} xmm2 = [4.5E+1,0.0E+0,0.0E+0,0.0E+0] 5047; AVX1-NEXT: vmovss {{.*#+}} xmm0 = [9.22337203E+18,0.0E+0,0.0E+0,0.0E+0] 5048; AVX1-NEXT: vcomiss %xmm2, %xmm0 5049; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 5050; AVX1-NEXT: vxorps %xmm3, %xmm3, %xmm3 5051; AVX1-NEXT: ja .LBB124_2 5052; AVX1-NEXT: # %bb.1: # %entry 5053; AVX1-NEXT: vmovaps %xmm0, %xmm3 5054; AVX1-NEXT: .LBB124_2: # %entry 5055; AVX1-NEXT: vsubss %xmm3, %xmm2, %xmm2 5056; AVX1-NEXT: vcvttss2si %xmm2, %rcx 5057; AVX1-NEXT: setbe %al 5058; AVX1-NEXT: movzbl %al, %eax 5059; AVX1-NEXT: shlq $63, %rax 5060; AVX1-NEXT: xorq %rcx, %rax 5061; AVX1-NEXT: vmovss {{.*#+}} xmm3 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 5062; AVX1-NEXT: vcomiss %xmm3, %xmm0 5063; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4 5064; AVX1-NEXT: ja .LBB124_4 5065; AVX1-NEXT: # %bb.3: # %entry 5066; AVX1-NEXT: vmovaps %xmm0, %xmm4 5067; AVX1-NEXT: .LBB124_4: # %entry 5068; AVX1-NEXT: vmovq %rax, %xmm2 5069; AVX1-NEXT: vsubss %xmm4, %xmm3, %xmm3 5070; AVX1-NEXT: vcvttss2si %xmm3, %rax 5071; AVX1-NEXT: setbe %cl 5072; AVX1-NEXT: movzbl %cl, %ecx 5073; AVX1-NEXT: shlq $63, %rcx 5074; AVX1-NEXT: xorq %rax, %rcx 5075; AVX1-NEXT: vmovq %rcx, %xmm3 5076; AVX1-NEXT: vmovss {{.*#+}} xmm4 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 5077; AVX1-NEXT: vcomiss %xmm4, %xmm0 5078; AVX1-NEXT: vxorps %xmm5, %xmm5, %xmm5 5079; AVX1-NEXT: ja .LBB124_6 5080; AVX1-NEXT: # %bb.5: # %entry 5081; AVX1-NEXT: vmovaps %xmm0, %xmm5 5082; AVX1-NEXT: .LBB124_6: # %entry 5083; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 5084; AVX1-NEXT: vsubss %xmm5, %xmm4, %xmm3 5085; AVX1-NEXT: vcvttss2si %xmm3, %rax 5086; AVX1-NEXT: setbe %cl 5087; AVX1-NEXT: movzbl %cl, %ecx 5088; AVX1-NEXT: shlq $63, %rcx 5089; AVX1-NEXT: xorq %rax, %rcx 5090; AVX1-NEXT: vmovq %rcx, %xmm3 5091; AVX1-NEXT: vmovss {{.*#+}} xmm4 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 5092; AVX1-NEXT: vcomiss %xmm4, %xmm0 5093; AVX1-NEXT: ja .LBB124_8 5094; AVX1-NEXT: # %bb.7: # %entry 5095; AVX1-NEXT: vmovaps %xmm0, %xmm1 5096; AVX1-NEXT: .LBB124_8: # %entry 5097; AVX1-NEXT: vsubss %xmm1, %xmm4, %xmm0 5098; AVX1-NEXT: vcvttss2si %xmm0, %rax 5099; AVX1-NEXT: setbe %cl 5100; AVX1-NEXT: movzbl %cl, %ecx 5101; AVX1-NEXT: shlq $63, %rcx 5102; AVX1-NEXT: xorq %rax, %rcx 5103; AVX1-NEXT: vmovq %rcx, %xmm0 5104; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 5105; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 5106; AVX1-NEXT: retq 5107; 5108; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f32: 5109; AVX512F: # %bb.0: # %entry 5110; AVX512F-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5111; AVX512F-NEXT: vmovq %rax, %xmm0 5112; AVX512F-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5113; AVX512F-NEXT: vmovq %rax, %xmm1 5114; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 5115; AVX512F-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5116; AVX512F-NEXT: vmovq %rax, %xmm1 5117; AVX512F-NEXT: vcvttss2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5118; AVX512F-NEXT: vmovq %rax, %xmm2 5119; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 5120; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 5121; AVX512F-NEXT: retq 5122; 5123; AVX512DQ-LABEL: constrained_vector_fptoui_v4i64_v4f32: 5124; AVX512DQ: # %bb.0: # %entry 5125; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,4.3E+1,4.4E+1,4.5E+1] 5126; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0 5127; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 5128; AVX512DQ-NEXT: retq 5129entry: 5130 %result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32( 5131 <4 x float><float 42.0, float 43.0, 5132 float 44.0, float 45.0>, 5133 metadata !"fpexcept.strict") #0 5134 ret <4 x i64> %result 5135} 5136 5137define <1 x i32> @constrained_vector_fptoui_v1i32_v1f64() #0 { 5138; CHECK-LABEL: constrained_vector_fptoui_v1i32_v1f64: 5139; CHECK: # %bb.0: # %entry 5140; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5141; CHECK-NEXT: # kill: def $eax killed $eax killed $rax 5142; CHECK-NEXT: retq 5143; 5144; AVX1-LABEL: constrained_vector_fptoui_v1i32_v1f64: 5145; AVX1: # %bb.0: # %entry 5146; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5147; AVX1-NEXT: # kill: def $eax killed $eax killed $rax 5148; AVX1-NEXT: retq 5149; 5150; AVX512-LABEL: constrained_vector_fptoui_v1i32_v1f64: 5151; AVX512: # %bb.0: # %entry 5152; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 5153; AVX512-NEXT: retq 5154entry: 5155 %result = call <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64( 5156 <1 x double><double 42.1>, 5157 metadata !"fpexcept.strict") #0 5158 ret <1 x i32> %result 5159} 5160 5161define <2 x i32> @constrained_vector_fptoui_v2i32_v2f64() #0 { 5162; CHECK-LABEL: constrained_vector_fptoui_v2i32_v2f64: 5163; CHECK: # %bb.0: # %entry 5164; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5165; CHECK-NEXT: movd %eax, %xmm1 5166; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5167; CHECK-NEXT: movd %eax, %xmm0 5168; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 5169; CHECK-NEXT: retq 5170; 5171; AVX1-LABEL: constrained_vector_fptoui_v2i32_v2f64: 5172; AVX1: # %bb.0: # %entry 5173; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5174; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx 5175; AVX1-NEXT: vmovd %ecx, %xmm0 5176; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 5177; AVX1-NEXT: retq 5178; 5179; AVX512-LABEL: constrained_vector_fptoui_v2i32_v2f64: 5180; AVX512: # %bb.0: # %entry 5181; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,0.0E+0,0.0E+0] 5182; AVX512-NEXT: vcvttpd2udq %zmm0, %ymm0 5183; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 5184; AVX512-NEXT: vzeroupper 5185; AVX512-NEXT: retq 5186entry: 5187 %result = call <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64( 5188 <2 x double><double 42.1, double 42.2>, 5189 metadata !"fpexcept.strict") #0 5190 ret <2 x i32> %result 5191} 5192 5193define <3 x i32> @constrained_vector_fptoui_v3i32_v3f64() #0 { 5194; CHECK-LABEL: constrained_vector_fptoui_v3i32_v3f64: 5195; CHECK: # %bb.0: # %entry 5196; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5197; CHECK-NEXT: movd %eax, %xmm1 5198; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5199; CHECK-NEXT: movd %eax, %xmm0 5200; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 5201; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5202; CHECK-NEXT: movd %eax, %xmm1 5203; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 5204; CHECK-NEXT: retq 5205; 5206; AVX1-LABEL: constrained_vector_fptoui_v3i32_v3f64: 5207; AVX1: # %bb.0: # %entry 5208; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5209; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rcx 5210; AVX1-NEXT: vmovd %ecx, %xmm0 5211; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 5212; AVX1-NEXT: vcvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5213; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 5214; AVX1-NEXT: retq 5215; 5216; AVX512-LABEL: constrained_vector_fptoui_v3i32_v3f64: 5217; AVX512: # %bb.0: # %entry 5218; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 5219; AVX512-NEXT: vmovd %eax, %xmm0 5220; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 5221; AVX512-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 5222; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax 5223; AVX512-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 5224; AVX512-NEXT: retq 5225entry: 5226 %result = call <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64( 5227 <3 x double><double 42.1, double 42.2, 5228 double 42.3>, 5229 metadata !"fpexcept.strict") #0 5230 ret <3 x i32> %result 5231} 5232 5233define <4 x i32> @constrained_vector_fptoui_v4i32_v4f64() #0 { 5234; CHECK-LABEL: constrained_vector_fptoui_v4i32_v4f64: 5235; CHECK: # %bb.0: # %entry 5236; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5237; CHECK-NEXT: movd %eax, %xmm0 5238; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5239; CHECK-NEXT: movd %eax, %xmm1 5240; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 5241; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5242; CHECK-NEXT: movd %eax, %xmm2 5243; CHECK-NEXT: cvttsd2si {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5244; CHECK-NEXT: movd %eax, %xmm0 5245; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 5246; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 5247; CHECK-NEXT: retq 5248; 5249; AVX1-LABEL: constrained_vector_fptoui_v4i32_v4f64: 5250; AVX1: # %bb.0: # %entry 5251; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm0 = [2.147483648E+9,2.147483648E+9,2.147483648E+9,2.147483648E+9] 5252; AVX1-NEXT: vmovapd {{.*#+}} ymm1 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1] 5253; AVX1-NEXT: vcmpltpd %ymm0, %ymm1, %ymm2 5254; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 5255; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm2[0,2],xmm3[0,2] 5256; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4 5257; AVX1-NEXT: vbroadcastss {{.*#+}} xmm5 = [2147483648,2147483648,2147483648,2147483648] 5258; AVX1-NEXT: vblendvps %xmm3, %xmm4, %xmm5, %xmm3 5259; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4 5260; AVX1-NEXT: vblendvpd %ymm2, %ymm4, %ymm0, %ymm0 5261; AVX1-NEXT: vsubpd %ymm0, %ymm1, %ymm0 5262; AVX1-NEXT: vcvttpd2dq %ymm0, %xmm0 5263; AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm0 5264; AVX1-NEXT: vzeroupper 5265; AVX1-NEXT: retq 5266; 5267; AVX512-LABEL: constrained_vector_fptoui_v4i32_v4f64: 5268; AVX512: # %bb.0: # %entry 5269; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1] 5270; AVX512-NEXT: vcvttpd2udq %zmm0, %ymm0 5271; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 5272; AVX512-NEXT: vzeroupper 5273; AVX512-NEXT: retq 5274entry: 5275 %result = call <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64( 5276 <4 x double><double 42.1, double 42.2, 5277 double 42.3, double 42.4>, 5278 metadata !"fpexcept.strict") #0 5279 ret <4 x i32> %result 5280} 5281 5282define <1 x i64> @constrained_vector_fptoui_v1i64_v1f64() #0 { 5283; CHECK-LABEL: constrained_vector_fptoui_v1i64_v1f64: 5284; CHECK: # %bb.0: # %entry 5285; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 5286; CHECK-NEXT: movsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0] 5287; CHECK-NEXT: comisd %xmm0, %xmm2 5288; CHECK-NEXT: xorpd %xmm1, %xmm1 5289; CHECK-NEXT: ja .LBB129_2 5290; CHECK-NEXT: # %bb.1: # %entry 5291; CHECK-NEXT: movapd %xmm2, %xmm1 5292; CHECK-NEXT: .LBB129_2: # %entry 5293; CHECK-NEXT: subsd %xmm1, %xmm0 5294; CHECK-NEXT: cvttsd2si %xmm0, %rcx 5295; CHECK-NEXT: setbe %al 5296; CHECK-NEXT: movzbl %al, %eax 5297; CHECK-NEXT: shlq $63, %rax 5298; CHECK-NEXT: xorq %rcx, %rax 5299; CHECK-NEXT: retq 5300; 5301; AVX1-LABEL: constrained_vector_fptoui_v1i64_v1f64: 5302; AVX1: # %bb.0: # %entry 5303; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 5304; AVX1-NEXT: vmovsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] 5305; AVX1-NEXT: vcomisd %xmm0, %xmm1 5306; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2 5307; AVX1-NEXT: ja .LBB129_2 5308; AVX1-NEXT: # %bb.1: # %entry 5309; AVX1-NEXT: vmovapd %xmm1, %xmm2 5310; AVX1-NEXT: .LBB129_2: # %entry 5311; AVX1-NEXT: vsubsd %xmm2, %xmm0, %xmm0 5312; AVX1-NEXT: vcvttsd2si %xmm0, %rcx 5313; AVX1-NEXT: setbe %al 5314; AVX1-NEXT: movzbl %al, %eax 5315; AVX1-NEXT: shlq $63, %rax 5316; AVX1-NEXT: xorq %rcx, %rax 5317; AVX1-NEXT: retq 5318; 5319; AVX512-LABEL: constrained_vector_fptoui_v1i64_v1f64: 5320; AVX512: # %bb.0: # %entry 5321; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5322; AVX512-NEXT: retq 5323entry: 5324 %result = call <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64( 5325 <1 x double><double 42.1>, 5326 metadata !"fpexcept.strict") #0 5327 ret <1 x i64> %result 5328} 5329 5330define <2 x i64> @constrained_vector_fptoui_v2i64_v2f64() #0 { 5331; CHECK-LABEL: constrained_vector_fptoui_v2i64_v2f64: 5332; CHECK: # %bb.0: # %entry 5333; CHECK-NEXT: movsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0] 5334; CHECK-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] 5335; CHECK-NEXT: comisd %xmm2, %xmm1 5336; CHECK-NEXT: xorpd %xmm0, %xmm0 5337; CHECK-NEXT: xorpd %xmm3, %xmm3 5338; CHECK-NEXT: ja .LBB130_2 5339; CHECK-NEXT: # %bb.1: # %entry 5340; CHECK-NEXT: movapd %xmm1, %xmm3 5341; CHECK-NEXT: .LBB130_2: # %entry 5342; CHECK-NEXT: subsd %xmm3, %xmm2 5343; CHECK-NEXT: cvttsd2si %xmm2, %rax 5344; CHECK-NEXT: setbe %cl 5345; CHECK-NEXT: movzbl %cl, %ecx 5346; CHECK-NEXT: shlq $63, %rcx 5347; CHECK-NEXT: xorq %rax, %rcx 5348; CHECK-NEXT: movq %rcx, %xmm2 5349; CHECK-NEXT: movsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0] 5350; CHECK-NEXT: comisd %xmm3, %xmm1 5351; CHECK-NEXT: ja .LBB130_4 5352; CHECK-NEXT: # %bb.3: # %entry 5353; CHECK-NEXT: movapd %xmm1, %xmm0 5354; CHECK-NEXT: .LBB130_4: # %entry 5355; CHECK-NEXT: subsd %xmm0, %xmm3 5356; CHECK-NEXT: cvttsd2si %xmm3, %rax 5357; CHECK-NEXT: setbe %cl 5358; CHECK-NEXT: movzbl %cl, %ecx 5359; CHECK-NEXT: shlq $63, %rcx 5360; CHECK-NEXT: xorq %rax, %rcx 5361; CHECK-NEXT: movq %rcx, %xmm0 5362; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 5363; CHECK-NEXT: retq 5364; 5365; AVX1-LABEL: constrained_vector_fptoui_v2i64_v2f64: 5366; AVX1: # %bb.0: # %entry 5367; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0] 5368; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = [9.2233720368547758E+18,0.0E+0] 5369; AVX1-NEXT: vcomisd %xmm2, %xmm0 5370; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 5371; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3 5372; AVX1-NEXT: ja .LBB130_2 5373; AVX1-NEXT: # %bb.1: # %entry 5374; AVX1-NEXT: vmovapd %xmm0, %xmm3 5375; AVX1-NEXT: .LBB130_2: # %entry 5376; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2 5377; AVX1-NEXT: vcvttsd2si %xmm2, %rax 5378; AVX1-NEXT: setbe %cl 5379; AVX1-NEXT: movzbl %cl, %ecx 5380; AVX1-NEXT: shlq $63, %rcx 5381; AVX1-NEXT: xorq %rax, %rcx 5382; AVX1-NEXT: vmovq %rcx, %xmm2 5383; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0] 5384; AVX1-NEXT: vcomisd %xmm3, %xmm0 5385; AVX1-NEXT: ja .LBB130_4 5386; AVX1-NEXT: # %bb.3: # %entry 5387; AVX1-NEXT: vmovapd %xmm0, %xmm1 5388; AVX1-NEXT: .LBB130_4: # %entry 5389; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0 5390; AVX1-NEXT: vcvttsd2si %xmm0, %rax 5391; AVX1-NEXT: setbe %cl 5392; AVX1-NEXT: movzbl %cl, %ecx 5393; AVX1-NEXT: shlq $63, %rcx 5394; AVX1-NEXT: xorq %rax, %rcx 5395; AVX1-NEXT: vmovq %rcx, %xmm0 5396; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 5397; AVX1-NEXT: retq 5398; 5399; AVX512F-LABEL: constrained_vector_fptoui_v2i64_v2f64: 5400; AVX512F: # %bb.0: # %entry 5401; AVX512F-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5402; AVX512F-NEXT: vmovq %rax, %xmm0 5403; AVX512F-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5404; AVX512F-NEXT: vmovq %rax, %xmm1 5405; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 5406; AVX512F-NEXT: retq 5407; 5408; AVX512DQ-LABEL: constrained_vector_fptoui_v2i64_v2f64: 5409; AVX512DQ: # %bb.0: # %entry 5410; AVX512DQ-NEXT: vmovaps {{.*#+}} xmm0 = [4.2100000000000001E+1,4.2200000000000003E+1] 5411; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 5412; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 5413; AVX512DQ-NEXT: vzeroupper 5414; AVX512DQ-NEXT: retq 5415entry: 5416 %result = call <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64( 5417 <2 x double><double 42.1, double 42.2>, 5418 metadata !"fpexcept.strict") #0 5419 ret <2 x i64> %result 5420} 5421 5422define <3 x i64> @constrained_vector_fptoui_v3i64_v3f64() #0 { 5423; CHECK-LABEL: constrained_vector_fptoui_v3i64_v3f64: 5424; CHECK: # %bb.0: # %entry 5425; CHECK-NEXT: movsd {{.*#+}} xmm2 = [4.2100000000000001E+1,0.0E+0] 5426; CHECK-NEXT: movsd {{.*#+}} xmm1 = [9.2233720368547758E+18,0.0E+0] 5427; CHECK-NEXT: comisd %xmm2, %xmm1 5428; CHECK-NEXT: xorpd %xmm0, %xmm0 5429; CHECK-NEXT: xorpd %xmm3, %xmm3 5430; CHECK-NEXT: ja .LBB131_2 5431; CHECK-NEXT: # %bb.1: # %entry 5432; CHECK-NEXT: movapd %xmm1, %xmm3 5433; CHECK-NEXT: .LBB131_2: # %entry 5434; CHECK-NEXT: subsd %xmm3, %xmm2 5435; CHECK-NEXT: cvttsd2si %xmm2, %rcx 5436; CHECK-NEXT: setbe %al 5437; CHECK-NEXT: movzbl %al, %eax 5438; CHECK-NEXT: shlq $63, %rax 5439; CHECK-NEXT: xorq %rcx, %rax 5440; CHECK-NEXT: movsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0] 5441; CHECK-NEXT: comisd %xmm2, %xmm1 5442; CHECK-NEXT: xorpd %xmm3, %xmm3 5443; CHECK-NEXT: ja .LBB131_4 5444; CHECK-NEXT: # %bb.3: # %entry 5445; CHECK-NEXT: movapd %xmm1, %xmm3 5446; CHECK-NEXT: .LBB131_4: # %entry 5447; CHECK-NEXT: subsd %xmm3, %xmm2 5448; CHECK-NEXT: cvttsd2si %xmm2, %rcx 5449; CHECK-NEXT: setbe %dl 5450; CHECK-NEXT: movzbl %dl, %edx 5451; CHECK-NEXT: shlq $63, %rdx 5452; CHECK-NEXT: xorq %rcx, %rdx 5453; CHECK-NEXT: movsd {{.*#+}} xmm2 = [4.2299999999999997E+1,0.0E+0] 5454; CHECK-NEXT: comisd %xmm2, %xmm1 5455; CHECK-NEXT: ja .LBB131_6 5456; CHECK-NEXT: # %bb.5: # %entry 5457; CHECK-NEXT: movapd %xmm1, %xmm0 5458; CHECK-NEXT: .LBB131_6: # %entry 5459; CHECK-NEXT: subsd %xmm0, %xmm2 5460; CHECK-NEXT: cvttsd2si %xmm2, %rsi 5461; CHECK-NEXT: setbe %cl 5462; CHECK-NEXT: movzbl %cl, %ecx 5463; CHECK-NEXT: shlq $63, %rcx 5464; CHECK-NEXT: xorq %rsi, %rcx 5465; CHECK-NEXT: retq 5466; 5467; AVX1-LABEL: constrained_vector_fptoui_v3i64_v3f64: 5468; AVX1: # %bb.0: # %entry 5469; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = [4.2200000000000003E+1,0.0E+0] 5470; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = [9.2233720368547758E+18,0.0E+0] 5471; AVX1-NEXT: vcomisd %xmm2, %xmm0 5472; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 5473; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3 5474; AVX1-NEXT: ja .LBB131_2 5475; AVX1-NEXT: # %bb.1: # %entry 5476; AVX1-NEXT: vmovapd %xmm0, %xmm3 5477; AVX1-NEXT: .LBB131_2: # %entry 5478; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2 5479; AVX1-NEXT: vcvttsd2si %xmm2, %rax 5480; AVX1-NEXT: setbe %cl 5481; AVX1-NEXT: movzbl %cl, %ecx 5482; AVX1-NEXT: shlq $63, %rcx 5483; AVX1-NEXT: xorq %rax, %rcx 5484; AVX1-NEXT: vmovq %rcx, %xmm2 5485; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2100000000000001E+1,0.0E+0] 5486; AVX1-NEXT: vcomisd %xmm3, %xmm0 5487; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4 5488; AVX1-NEXT: ja .LBB131_4 5489; AVX1-NEXT: # %bb.3: # %entry 5490; AVX1-NEXT: vmovapd %xmm0, %xmm4 5491; AVX1-NEXT: .LBB131_4: # %entry 5492; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3 5493; AVX1-NEXT: vcvttsd2si %xmm3, %rax 5494; AVX1-NEXT: setbe %cl 5495; AVX1-NEXT: movzbl %cl, %ecx 5496; AVX1-NEXT: shlq $63, %rcx 5497; AVX1-NEXT: xorq %rax, %rcx 5498; AVX1-NEXT: vmovq %rcx, %xmm3 5499; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 5500; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2299999999999997E+1,0.0E+0] 5501; AVX1-NEXT: vcomisd %xmm3, %xmm0 5502; AVX1-NEXT: ja .LBB131_6 5503; AVX1-NEXT: # %bb.5: # %entry 5504; AVX1-NEXT: vmovapd %xmm0, %xmm1 5505; AVX1-NEXT: .LBB131_6: # %entry 5506; AVX1-NEXT: vsubsd %xmm1, %xmm3, %xmm0 5507; AVX1-NEXT: vcvttsd2si %xmm0, %rax 5508; AVX1-NEXT: setbe %cl 5509; AVX1-NEXT: movzbl %cl, %ecx 5510; AVX1-NEXT: shlq $63, %rcx 5511; AVX1-NEXT: xorq %rax, %rcx 5512; AVX1-NEXT: vmovq %rcx, %xmm0 5513; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 5514; AVX1-NEXT: retq 5515; 5516; AVX512-LABEL: constrained_vector_fptoui_v3i64_v3f64: 5517; AVX512: # %bb.0: # %entry 5518; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5519; AVX512-NEXT: vmovq %rax, %xmm0 5520; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5521; AVX512-NEXT: vmovq %rax, %xmm1 5522; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 5523; AVX512-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5524; AVX512-NEXT: vmovq %rax, %xmm1 5525; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 5526; AVX512-NEXT: retq 5527entry: 5528 %result = call <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f64( 5529 <3 x double><double 42.1, double 42.2, 5530 double 42.3>, 5531 metadata !"fpexcept.strict") #0 5532 ret <3 x i64> %result 5533} 5534 5535define <4 x i64> @constrained_vector_fptoui_v4i64_v4f64() #0 { 5536; CHECK-LABEL: constrained_vector_fptoui_v4i64_v4f64: 5537; CHECK: # %bb.0: # %entry 5538; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 5539; CHECK-NEXT: movsd {{.*#+}} xmm2 = [9.2233720368547758E+18,0.0E+0] 5540; CHECK-NEXT: comisd %xmm0, %xmm2 5541; CHECK-NEXT: xorpd %xmm1, %xmm1 5542; CHECK-NEXT: xorpd %xmm3, %xmm3 5543; CHECK-NEXT: ja .LBB132_2 5544; CHECK-NEXT: # %bb.1: # %entry 5545; CHECK-NEXT: movapd %xmm2, %xmm3 5546; CHECK-NEXT: .LBB132_2: # %entry 5547; CHECK-NEXT: subsd %xmm3, %xmm0 5548; CHECK-NEXT: cvttsd2si %xmm0, %rcx 5549; CHECK-NEXT: setbe %al 5550; CHECK-NEXT: movzbl %al, %eax 5551; CHECK-NEXT: shlq $63, %rax 5552; CHECK-NEXT: xorq %rcx, %rax 5553; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 5554; CHECK-NEXT: comisd %xmm0, %xmm2 5555; CHECK-NEXT: xorpd %xmm4, %xmm4 5556; CHECK-NEXT: ja .LBB132_4 5557; CHECK-NEXT: # %bb.3: # %entry 5558; CHECK-NEXT: movapd %xmm2, %xmm4 5559; CHECK-NEXT: .LBB132_4: # %entry 5560; CHECK-NEXT: movq %rax, %xmm3 5561; CHECK-NEXT: subsd %xmm4, %xmm0 5562; CHECK-NEXT: cvttsd2si %xmm0, %rax 5563; CHECK-NEXT: setbe %cl 5564; CHECK-NEXT: movzbl %cl, %ecx 5565; CHECK-NEXT: shlq $63, %rcx 5566; CHECK-NEXT: xorq %rax, %rcx 5567; CHECK-NEXT: movq %rcx, %xmm0 5568; CHECK-NEXT: movsd {{.*#+}} xmm4 = [4.2399999999999999E+1,0.0E+0] 5569; CHECK-NEXT: comisd %xmm4, %xmm2 5570; CHECK-NEXT: xorpd %xmm5, %xmm5 5571; CHECK-NEXT: ja .LBB132_6 5572; CHECK-NEXT: # %bb.5: # %entry 5573; CHECK-NEXT: movapd %xmm2, %xmm5 5574; CHECK-NEXT: .LBB132_6: # %entry 5575; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 5576; CHECK-NEXT: subsd %xmm5, %xmm4 5577; CHECK-NEXT: cvttsd2si %xmm4, %rax 5578; CHECK-NEXT: setbe %cl 5579; CHECK-NEXT: movzbl %cl, %ecx 5580; CHECK-NEXT: shlq $63, %rcx 5581; CHECK-NEXT: xorq %rax, %rcx 5582; CHECK-NEXT: movq %rcx, %xmm3 5583; CHECK-NEXT: movsd {{.*#+}} xmm4 = [4.2299999999999997E+1,0.0E+0] 5584; CHECK-NEXT: comisd %xmm4, %xmm2 5585; CHECK-NEXT: ja .LBB132_8 5586; CHECK-NEXT: # %bb.7: # %entry 5587; CHECK-NEXT: movapd %xmm2, %xmm1 5588; CHECK-NEXT: .LBB132_8: # %entry 5589; CHECK-NEXT: subsd %xmm1, %xmm4 5590; CHECK-NEXT: cvttsd2si %xmm4, %rax 5591; CHECK-NEXT: setbe %cl 5592; CHECK-NEXT: movzbl %cl, %ecx 5593; CHECK-NEXT: shlq $63, %rcx 5594; CHECK-NEXT: xorq %rax, %rcx 5595; CHECK-NEXT: movq %rcx, %xmm1 5596; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 5597; CHECK-NEXT: retq 5598; 5599; AVX1-LABEL: constrained_vector_fptoui_v4i64_v4f64: 5600; AVX1: # %bb.0: # %entry 5601; AVX1-NEXT: vmovsd {{.*#+}} xmm2 = [4.2399999999999999E+1,0.0E+0] 5602; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = [9.2233720368547758E+18,0.0E+0] 5603; AVX1-NEXT: vcomisd %xmm2, %xmm0 5604; AVX1-NEXT: vxorpd %xmm1, %xmm1, %xmm1 5605; AVX1-NEXT: vxorpd %xmm3, %xmm3, %xmm3 5606; AVX1-NEXT: ja .LBB132_2 5607; AVX1-NEXT: # %bb.1: # %entry 5608; AVX1-NEXT: vmovapd %xmm0, %xmm3 5609; AVX1-NEXT: .LBB132_2: # %entry 5610; AVX1-NEXT: vsubsd %xmm3, %xmm2, %xmm2 5611; AVX1-NEXT: vcvttsd2si %xmm2, %rcx 5612; AVX1-NEXT: setbe %al 5613; AVX1-NEXT: movzbl %al, %eax 5614; AVX1-NEXT: shlq $63, %rax 5615; AVX1-NEXT: xorq %rcx, %rax 5616; AVX1-NEXT: vmovsd {{.*#+}} xmm3 = [4.2299999999999997E+1,0.0E+0] 5617; AVX1-NEXT: vcomisd %xmm3, %xmm0 5618; AVX1-NEXT: vxorpd %xmm4, %xmm4, %xmm4 5619; AVX1-NEXT: ja .LBB132_4 5620; AVX1-NEXT: # %bb.3: # %entry 5621; AVX1-NEXT: vmovapd %xmm0, %xmm4 5622; AVX1-NEXT: .LBB132_4: # %entry 5623; AVX1-NEXT: vmovq %rax, %xmm2 5624; AVX1-NEXT: vsubsd %xmm4, %xmm3, %xmm3 5625; AVX1-NEXT: vcvttsd2si %xmm3, %rax 5626; AVX1-NEXT: setbe %cl 5627; AVX1-NEXT: movzbl %cl, %ecx 5628; AVX1-NEXT: shlq $63, %rcx 5629; AVX1-NEXT: xorq %rax, %rcx 5630; AVX1-NEXT: vmovq %rcx, %xmm3 5631; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = [4.2200000000000003E+1,0.0E+0] 5632; AVX1-NEXT: vcomisd %xmm4, %xmm0 5633; AVX1-NEXT: vxorpd %xmm5, %xmm5, %xmm5 5634; AVX1-NEXT: ja .LBB132_6 5635; AVX1-NEXT: # %bb.5: # %entry 5636; AVX1-NEXT: vmovapd %xmm0, %xmm5 5637; AVX1-NEXT: .LBB132_6: # %entry 5638; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm3[0],xmm2[0] 5639; AVX1-NEXT: vsubsd %xmm5, %xmm4, %xmm3 5640; AVX1-NEXT: vcvttsd2si %xmm3, %rax 5641; AVX1-NEXT: setbe %cl 5642; AVX1-NEXT: movzbl %cl, %ecx 5643; AVX1-NEXT: shlq $63, %rcx 5644; AVX1-NEXT: xorq %rax, %rcx 5645; AVX1-NEXT: vmovq %rcx, %xmm3 5646; AVX1-NEXT: vmovsd {{.*#+}} xmm4 = [4.2100000000000001E+1,0.0E+0] 5647; AVX1-NEXT: vcomisd %xmm4, %xmm0 5648; AVX1-NEXT: ja .LBB132_8 5649; AVX1-NEXT: # %bb.7: # %entry 5650; AVX1-NEXT: vmovapd %xmm0, %xmm1 5651; AVX1-NEXT: .LBB132_8: # %entry 5652; AVX1-NEXT: vsubsd %xmm1, %xmm4, %xmm0 5653; AVX1-NEXT: vcvttsd2si %xmm0, %rax 5654; AVX1-NEXT: setbe %cl 5655; AVX1-NEXT: movzbl %cl, %ecx 5656; AVX1-NEXT: shlq $63, %rcx 5657; AVX1-NEXT: xorq %rax, %rcx 5658; AVX1-NEXT: vmovq %rcx, %xmm0 5659; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] 5660; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 5661; AVX1-NEXT: retq 5662; 5663; AVX512F-LABEL: constrained_vector_fptoui_v4i64_v4f64: 5664; AVX512F: # %bb.0: # %entry 5665; AVX512F-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5666; AVX512F-NEXT: vmovq %rax, %xmm0 5667; AVX512F-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5668; AVX512F-NEXT: vmovq %rax, %xmm1 5669; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 5670; AVX512F-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5671; AVX512F-NEXT: vmovq %rax, %xmm1 5672; AVX512F-NEXT: vcvttsd2usi {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %rax 5673; AVX512F-NEXT: vmovq %rax, %xmm2 5674; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 5675; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 5676; AVX512F-NEXT: retq 5677; 5678; AVX512DQ-LABEL: constrained_vector_fptoui_v4i64_v4f64: 5679; AVX512DQ: # %bb.0: # %entry 5680; AVX512DQ-NEXT: vmovaps {{.*#+}} ymm0 = [4.2100000000000001E+1,4.2200000000000003E+1,4.2299999999999997E+1,4.2399999999999999E+1] 5681; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 5682; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 5683; AVX512DQ-NEXT: retq 5684entry: 5685 %result = call <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64( 5686 <4 x double><double 42.1, double 42.2, 5687 double 42.3, double 42.4>, 5688 metadata !"fpexcept.strict") #0 5689 ret <4 x i64> %result 5690} 5691 5692 5693define <1 x float> @constrained_vector_fptrunc_v1f64() #0 { 5694; CHECK-LABEL: constrained_vector_fptrunc_v1f64: 5695; CHECK: # %bb.0: # %entry 5696; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 5697; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 5698; CHECK-NEXT: retq 5699; 5700; AVX-LABEL: constrained_vector_fptrunc_v1f64: 5701; AVX: # %bb.0: # %entry 5702; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 5703; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 5704; AVX-NEXT: retq 5705entry: 5706 %result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64( 5707 <1 x double><double 42.1>, 5708 metadata !"round.dynamic", 5709 metadata !"fpexcept.strict") #0 5710 ret <1 x float> %result 5711} 5712 5713define <2 x float> @constrained_vector_fptrunc_v2f64() #0 { 5714; CHECK-LABEL: constrained_vector_fptrunc_v2f64: 5715; CHECK: # %bb.0: # %entry 5716; CHECK-NEXT: cvtpd2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 5717; CHECK-NEXT: retq 5718; 5719; AVX-LABEL: constrained_vector_fptrunc_v2f64: 5720; AVX: # %bb.0: # %entry 5721; AVX-NEXT: vcvtpd2psx {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 5722; AVX-NEXT: retq 5723entry: 5724 %result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64( 5725 <2 x double><double 42.1, double 42.2>, 5726 metadata !"round.dynamic", 5727 metadata !"fpexcept.strict") #0 5728 ret <2 x float> %result 5729} 5730 5731define <3 x float> @constrained_vector_fptrunc_v3f64() #0 { 5732; CHECK-LABEL: constrained_vector_fptrunc_v3f64: 5733; CHECK: # %bb.0: # %entry 5734; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 5735; CHECK-NEXT: cvtsd2ss %xmm0, %xmm1 5736; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 5737; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 5738; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 5739; CHECK-NEXT: movsd {{.*#+}} xmm1 = [4.2299999999999997E+1,0.0E+0] 5740; CHECK-NEXT: cvtsd2ss %xmm1, %xmm1 5741; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 5742; CHECK-NEXT: retq 5743; 5744; AVX-LABEL: constrained_vector_fptrunc_v3f64: 5745; AVX: # %bb.0: # %entry 5746; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 5747; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 5748; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.2100000000000001E+1,0.0E+0] 5749; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 5750; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 5751; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [4.2299999999999997E+1,0.0E+0] 5752; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 5753; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] 5754; AVX-NEXT: retq 5755entry: 5756 %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64( 5757 <3 x double><double 42.1, double 42.2, 5758 double 42.3>, 5759 metadata !"round.dynamic", 5760 metadata !"fpexcept.strict") #0 5761 ret <3 x float> %result 5762} 5763 5764define <4 x float> @constrained_vector_fptrunc_v4f64() #0 { 5765; CHECK-LABEL: constrained_vector_fptrunc_v4f64: 5766; CHECK: # %bb.0: # %entry 5767; CHECK-NEXT: cvtpd2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 5768; CHECK-NEXT: cvtpd2ps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 5769; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 5770; CHECK-NEXT: retq 5771; 5772; AVX-LABEL: constrained_vector_fptrunc_v4f64: 5773; AVX: # %bb.0: # %entry 5774; AVX-NEXT: vcvtpd2psy {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 5775; AVX-NEXT: retq 5776entry: 5777 %result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64( 5778 <4 x double><double 42.1, double 42.2, 5779 double 42.3, double 42.4>, 5780 metadata !"round.dynamic", 5781 metadata !"fpexcept.strict") #0 5782 ret <4 x float> %result 5783} 5784 5785define <1 x double> @constrained_vector_fpext_v1f32() #0 { 5786; CHECK-LABEL: constrained_vector_fpext_v1f32: 5787; CHECK: # %bb.0: # %entry 5788; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 5789; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 5790; CHECK-NEXT: retq 5791; 5792; AVX-LABEL: constrained_vector_fpext_v1f32: 5793; AVX: # %bb.0: # %entry 5794; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 5795; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 5796; AVX-NEXT: retq 5797entry: 5798 %result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32( 5799 <1 x float><float 42.0>, 5800 metadata !"fpexcept.strict") #0 5801 ret <1 x double> %result 5802} 5803 5804define <2 x double> @constrained_vector_fpext_v2f32() #0 { 5805; CHECK-LABEL: constrained_vector_fpext_v2f32: 5806; CHECK: # %bb.0: # %entry 5807; CHECK-NEXT: cvtps2pd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 5808; CHECK-NEXT: retq 5809; 5810; AVX-LABEL: constrained_vector_fpext_v2f32: 5811; AVX: # %bb.0: # %entry 5812; AVX-NEXT: vcvtps2pd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 5813; AVX-NEXT: retq 5814entry: 5815 %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32( 5816 <2 x float><float 42.0, float 43.0>, 5817 metadata !"fpexcept.strict") #0 5818 ret <2 x double> %result 5819} 5820 5821define <3 x double> @constrained_vector_fpext_v3f32() #0 { 5822; CHECK-LABEL: constrained_vector_fpext_v3f32: 5823; CHECK: # %bb.0: # %entry 5824; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 5825; CHECK-NEXT: cvtss2sd %xmm0, %xmm1 5826; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 5827; CHECK-NEXT: cvtss2sd %xmm0, %xmm0 5828; CHECK-NEXT: movss {{.*#+}} xmm2 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 5829; CHECK-NEXT: cvtss2sd %xmm2, %xmm2 5830; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) 5831; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 5832; CHECK-NEXT: wait 5833; CHECK-NEXT: retq 5834; 5835; AVX-LABEL: constrained_vector_fpext_v3f32: 5836; AVX: # %bb.0: # %entry 5837; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 5838; AVX-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 5839; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 5840; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 5841; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 5842; AVX-NEXT: vmovss {{.*#+}} xmm1 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 5843; AVX-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 5844; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 5845; AVX-NEXT: retq 5846entry: 5847 %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32( 5848 <3 x float><float 42.0, float 43.0, 5849 float 44.0>, 5850 metadata !"fpexcept.strict") #0 5851 ret <3 x double> %result 5852} 5853 5854define <4 x double> @constrained_vector_fpext_v4f32() #0 { 5855; CHECK-LABEL: constrained_vector_fpext_v4f32: 5856; CHECK: # %bb.0: # %entry 5857; CHECK-NEXT: cvtps2pd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 5858; CHECK-NEXT: cvtps2pd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 5859; CHECK-NEXT: retq 5860; 5861; AVX-LABEL: constrained_vector_fpext_v4f32: 5862; AVX: # %bb.0: # %entry 5863; AVX-NEXT: vcvtps2pd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0 5864; AVX-NEXT: retq 5865entry: 5866 %result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32( 5867 <4 x float><float 42.0, float 43.0, 5868 float 44.0, float 45.0>, 5869 metadata !"fpexcept.strict") #0 5870 ret <4 x double> %result 5871} 5872 5873define <1 x float> @constrained_vector_ceil_v1f32_var(ptr %a) #0 { 5874; CHECK-LABEL: constrained_vector_ceil_v1f32_var: 5875; CHECK: # %bb.0: # %entry 5876; CHECK-NEXT: pushq %rax 5877; CHECK-NEXT: .cfi_def_cfa_offset 16 5878; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5879; CHECK-NEXT: callq ceilf@PLT 5880; CHECK-NEXT: popq %rax 5881; CHECK-NEXT: .cfi_def_cfa_offset 8 5882; CHECK-NEXT: retq 5883; 5884; AVX-LABEL: constrained_vector_ceil_v1f32_var: 5885; AVX: # %bb.0: # %entry 5886; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5887; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 5888; AVX-NEXT: retq 5889entry: 5890 %b = load <1 x float>, ptr %a 5891 %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32( 5892 <1 x float> %b, 5893 metadata !"fpexcept.strict") #0 5894 ret <1 x float> %ceil 5895} 5896 5897define <2 x double> @constrained_vector_ceil_v2f64_var(ptr %a) #0 { 5898; CHECK-LABEL: constrained_vector_ceil_v2f64_var: 5899; CHECK: # %bb.0: # %entry 5900; CHECK-NEXT: subq $40, %rsp 5901; CHECK-NEXT: .cfi_def_cfa_offset 48 5902; CHECK-NEXT: movaps (%rdi), %xmm0 5903; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 5904; CHECK-NEXT: callq ceil@PLT 5905; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 5906; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 5907; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 5908; CHECK-NEXT: callq ceil@PLT 5909; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 5910; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 5911; CHECK-NEXT: movaps %xmm1, %xmm0 5912; CHECK-NEXT: addq $40, %rsp 5913; CHECK-NEXT: .cfi_def_cfa_offset 8 5914; CHECK-NEXT: retq 5915; 5916; AVX-LABEL: constrained_vector_ceil_v2f64_var: 5917; AVX: # %bb.0: # %entry 5918; AVX-NEXT: vroundpd $10, (%rdi), %xmm0 5919; AVX-NEXT: retq 5920entry: 5921 %b = load <2 x double>, ptr %a 5922 %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64( 5923 <2 x double> %b, 5924 metadata !"fpexcept.strict") #0 5925 ret <2 x double> %ceil 5926} 5927 5928define <3 x float> @constrained_vector_ceil_v3f32_var(ptr %a) #0 { 5929; CHECK-LABEL: constrained_vector_ceil_v3f32_var: 5930; CHECK: # %bb.0: # %entry 5931; CHECK-NEXT: subq $56, %rsp 5932; CHECK-NEXT: .cfi_def_cfa_offset 64 5933; CHECK-NEXT: movaps (%rdi), %xmm0 5934; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 5935; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 5936; CHECK-NEXT: callq ceilf@PLT 5937; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 5938; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 5939; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 5940; CHECK-NEXT: callq ceilf@PLT 5941; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 5942; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 5943; CHECK-NEXT: callq ceilf@PLT 5944; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 5945; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 5946; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 5947; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 5948; CHECK-NEXT: addq $56, %rsp 5949; CHECK-NEXT: .cfi_def_cfa_offset 8 5950; CHECK-NEXT: retq 5951; 5952; AVX-LABEL: constrained_vector_ceil_v3f32_var: 5953; AVX: # %bb.0: # %entry 5954; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 5955; AVX-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 5956; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 5957; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 5958; AVX-NEXT: vroundss $10, %xmm1, %xmm1, %xmm1 5959; AVX-NEXT: vroundss $10, %xmm2, %xmm2, %xmm2 5960; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 5961; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 5962; AVX-NEXT: retq 5963entry: 5964 %b = load <3 x float>, ptr %a 5965 %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32( 5966 <3 x float> %b, 5967 metadata !"fpexcept.strict") #0 5968 ret <3 x float> %ceil 5969} 5970 5971define <3 x double> @constrained_vector_ceil_v3f64_var(ptr %a) #0 { 5972; CHECK-LABEL: constrained_vector_ceil_v3f64_var: 5973; CHECK: # %bb.0: # %entry 5974; CHECK-NEXT: subq $40, %rsp 5975; CHECK-NEXT: .cfi_def_cfa_offset 48 5976; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 5977; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 5978; CHECK-NEXT: movaps (%rdi), %xmm0 5979; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 5980; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 5981; CHECK-NEXT: callq ceil@PLT 5982; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 5983; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 5984; CHECK-NEXT: callq ceil@PLT 5985; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 5986; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload 5987; CHECK-NEXT: # xmm0 = mem[0],zero 5988; CHECK-NEXT: callq ceil@PLT 5989; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 5990; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 5991; CHECK-NEXT: wait 5992; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 5993; CHECK-NEXT: # xmm0 = mem[0],zero 5994; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 5995; CHECK-NEXT: # xmm1 = mem[0],zero 5996; CHECK-NEXT: addq $40, %rsp 5997; CHECK-NEXT: .cfi_def_cfa_offset 8 5998; CHECK-NEXT: retq 5999; 6000; AVX-LABEL: constrained_vector_ceil_v3f64_var: 6001; AVX: # %bb.0: # %entry 6002; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 6003; AVX-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 6004; AVX-NEXT: vroundpd $10, (%rdi), %xmm1 6005; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 6006; AVX-NEXT: retq 6007entry: 6008 %b = load <3 x double>, ptr %a 6009 %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64( 6010 <3 x double> %b, 6011 metadata !"fpexcept.strict") #0 6012 ret <3 x double> %ceil 6013} 6014 6015define <1 x float> @constrained_vector_floor_v1f32_var(ptr %a) #0 { 6016; CHECK-LABEL: constrained_vector_floor_v1f32_var: 6017; CHECK: # %bb.0: # %entry 6018; CHECK-NEXT: pushq %rax 6019; CHECK-NEXT: .cfi_def_cfa_offset 16 6020; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6021; CHECK-NEXT: callq floorf@PLT 6022; CHECK-NEXT: popq %rax 6023; CHECK-NEXT: .cfi_def_cfa_offset 8 6024; CHECK-NEXT: retq 6025; 6026; AVX-LABEL: constrained_vector_floor_v1f32_var: 6027; AVX: # %bb.0: # %entry 6028; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6029; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 6030; AVX-NEXT: retq 6031entry: 6032 %b = load <1 x float>, ptr %a 6033 %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32( 6034 <1 x float> %b, 6035 metadata !"fpexcept.strict") #0 6036 ret <1 x float> %floor 6037} 6038 6039 6040define <2 x double> @constrained_vector_floor_v2f64_var(ptr %a) #0 { 6041; CHECK-LABEL: constrained_vector_floor_v2f64_var: 6042; CHECK: # %bb.0: # %entry 6043; CHECK-NEXT: subq $40, %rsp 6044; CHECK-NEXT: .cfi_def_cfa_offset 48 6045; CHECK-NEXT: movaps (%rdi), %xmm0 6046; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 6047; CHECK-NEXT: callq floor@PLT 6048; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 6049; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 6050; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 6051; CHECK-NEXT: callq floor@PLT 6052; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 6053; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 6054; CHECK-NEXT: movaps %xmm1, %xmm0 6055; CHECK-NEXT: addq $40, %rsp 6056; CHECK-NEXT: .cfi_def_cfa_offset 8 6057; CHECK-NEXT: retq 6058; 6059; AVX-LABEL: constrained_vector_floor_v2f64_var: 6060; AVX: # %bb.0: # %entry 6061; AVX-NEXT: vroundpd $9, (%rdi), %xmm0 6062; AVX-NEXT: retq 6063entry: 6064 %b = load <2 x double>, ptr %a 6065 %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64( 6066 <2 x double> %b, 6067 metadata !"fpexcept.strict") #0 6068 ret <2 x double> %floor 6069} 6070 6071define <3 x float> @constrained_vector_floor_v3f32_var(ptr %a) #0 { 6072; CHECK-LABEL: constrained_vector_floor_v3f32_var: 6073; CHECK: # %bb.0: # %entry 6074; CHECK-NEXT: subq $56, %rsp 6075; CHECK-NEXT: .cfi_def_cfa_offset 64 6076; CHECK-NEXT: movaps (%rdi), %xmm0 6077; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 6078; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 6079; CHECK-NEXT: callq floorf@PLT 6080; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 6081; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 6082; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 6083; CHECK-NEXT: callq floorf@PLT 6084; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 6085; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 6086; CHECK-NEXT: callq floorf@PLT 6087; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 6088; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 6089; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 6090; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 6091; CHECK-NEXT: addq $56, %rsp 6092; CHECK-NEXT: .cfi_def_cfa_offset 8 6093; CHECK-NEXT: retq 6094; 6095; AVX-LABEL: constrained_vector_floor_v3f32_var: 6096; AVX: # %bb.0: # %entry 6097; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6098; AVX-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 6099; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 6100; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 6101; AVX-NEXT: vroundss $9, %xmm1, %xmm1, %xmm1 6102; AVX-NEXT: vroundss $9, %xmm2, %xmm2, %xmm2 6103; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 6104; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 6105; AVX-NEXT: retq 6106entry: 6107 %b = load <3 x float>, ptr %a 6108 %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32( 6109 <3 x float> %b, 6110 metadata !"fpexcept.strict") #0 6111 ret <3 x float> %floor 6112} 6113 6114define <3 x double> @constrained_vector_floor_v3f64_var(ptr %a) #0 { 6115; CHECK-LABEL: constrained_vector_floor_v3f64_var: 6116; CHECK: # %bb.0: # %entry 6117; CHECK-NEXT: subq $40, %rsp 6118; CHECK-NEXT: .cfi_def_cfa_offset 48 6119; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 6120; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 6121; CHECK-NEXT: movaps (%rdi), %xmm0 6122; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 6123; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 6124; CHECK-NEXT: callq floor@PLT 6125; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 6126; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 6127; CHECK-NEXT: callq floor@PLT 6128; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 6129; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload 6130; CHECK-NEXT: # xmm0 = mem[0],zero 6131; CHECK-NEXT: callq floor@PLT 6132; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 6133; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 6134; CHECK-NEXT: wait 6135; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 6136; CHECK-NEXT: # xmm0 = mem[0],zero 6137; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 6138; CHECK-NEXT: # xmm1 = mem[0],zero 6139; CHECK-NEXT: addq $40, %rsp 6140; CHECK-NEXT: .cfi_def_cfa_offset 8 6141; CHECK-NEXT: retq 6142; 6143; AVX-LABEL: constrained_vector_floor_v3f64_var: 6144; AVX: # %bb.0: # %entry 6145; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 6146; AVX-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 6147; AVX-NEXT: vroundpd $9, (%rdi), %xmm1 6148; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 6149; AVX-NEXT: retq 6150entry: 6151 %b = load <3 x double>, ptr %a 6152 %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64( 6153 <3 x double> %b, 6154 metadata !"fpexcept.strict") #0 6155 ret <3 x double> %floor 6156} 6157 6158define <1 x float> @constrained_vector_round_v1f32_var(ptr %a) #0 { 6159; CHECK-LABEL: constrained_vector_round_v1f32_var: 6160; CHECK: # %bb.0: # %entry 6161; CHECK-NEXT: pushq %rax 6162; CHECK-NEXT: .cfi_def_cfa_offset 16 6163; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6164; CHECK-NEXT: callq roundf@PLT 6165; CHECK-NEXT: popq %rax 6166; CHECK-NEXT: .cfi_def_cfa_offset 8 6167; CHECK-NEXT: retq 6168; 6169; AVX-LABEL: constrained_vector_round_v1f32_var: 6170; AVX: # %bb.0: # %entry 6171; AVX-NEXT: pushq %rax 6172; AVX-NEXT: .cfi_def_cfa_offset 16 6173; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6174; AVX-NEXT: callq roundf@PLT 6175; AVX-NEXT: popq %rax 6176; AVX-NEXT: .cfi_def_cfa_offset 8 6177; AVX-NEXT: retq 6178entry: 6179 %b = load <1 x float>, ptr %a 6180 %round = call <1 x float> @llvm.experimental.constrained.round.v1f32( 6181 <1 x float> %b, 6182 metadata !"fpexcept.strict") #0 6183 ret <1 x float> %round 6184} 6185 6186define <2 x double> @constrained_vector_round_v2f64_var(ptr %a) #0 { 6187; CHECK-LABEL: constrained_vector_round_v2f64_var: 6188; CHECK: # %bb.0: # %entry 6189; CHECK-NEXT: subq $40, %rsp 6190; CHECK-NEXT: .cfi_def_cfa_offset 48 6191; CHECK-NEXT: movaps (%rdi), %xmm0 6192; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 6193; CHECK-NEXT: callq round@PLT 6194; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 6195; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 6196; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 6197; CHECK-NEXT: callq round@PLT 6198; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 6199; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 6200; CHECK-NEXT: movaps %xmm1, %xmm0 6201; CHECK-NEXT: addq $40, %rsp 6202; CHECK-NEXT: .cfi_def_cfa_offset 8 6203; CHECK-NEXT: retq 6204; 6205; AVX-LABEL: constrained_vector_round_v2f64_var: 6206; AVX: # %bb.0: # %entry 6207; AVX-NEXT: subq $40, %rsp 6208; AVX-NEXT: .cfi_def_cfa_offset 48 6209; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 6210; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 6211; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 6212; AVX-NEXT: callq round@PLT 6213; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 6214; AVX-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload 6215; AVX-NEXT: # xmm0 = mem[0],zero 6216; AVX-NEXT: callq round@PLT 6217; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 6218; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 6219; AVX-NEXT: addq $40, %rsp 6220; AVX-NEXT: .cfi_def_cfa_offset 8 6221; AVX-NEXT: retq 6222entry: 6223 %b = load <2 x double>, ptr %a 6224 %round = call <2 x double> @llvm.experimental.constrained.round.v2f64( 6225 <2 x double> %b, 6226 metadata !"fpexcept.strict") #0 6227 ret <2 x double> %round 6228} 6229 6230define <3 x float> @constrained_vector_round_v3f32_var(ptr %a) #0 { 6231; CHECK-LABEL: constrained_vector_round_v3f32_var: 6232; CHECK: # %bb.0: # %entry 6233; CHECK-NEXT: subq $56, %rsp 6234; CHECK-NEXT: .cfi_def_cfa_offset 64 6235; CHECK-NEXT: movaps (%rdi), %xmm0 6236; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 6237; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 6238; CHECK-NEXT: callq roundf@PLT 6239; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 6240; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 6241; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 6242; CHECK-NEXT: callq roundf@PLT 6243; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 6244; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 6245; CHECK-NEXT: callq roundf@PLT 6246; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 6247; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 6248; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 6249; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 6250; CHECK-NEXT: addq $56, %rsp 6251; CHECK-NEXT: .cfi_def_cfa_offset 8 6252; CHECK-NEXT: retq 6253; 6254; AVX-LABEL: constrained_vector_round_v3f32_var: 6255; AVX: # %bb.0: # %entry 6256; AVX-NEXT: pushq %rbx 6257; AVX-NEXT: .cfi_def_cfa_offset 16 6258; AVX-NEXT: subq $48, %rsp 6259; AVX-NEXT: .cfi_def_cfa_offset 64 6260; AVX-NEXT: .cfi_offset %rbx, -16 6261; AVX-NEXT: movq %rdi, %rbx 6262; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6263; AVX-NEXT: callq roundf@PLT 6264; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 6265; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6266; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 6267; AVX-NEXT: vmovss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 6268; AVX-NEXT: callq roundf@PLT 6269; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 6270; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 6271; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero 6272; AVX-NEXT: callq roundf@PLT 6273; AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 6274; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 6275; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 6276; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 6277; AVX-NEXT: addq $48, %rsp 6278; AVX-NEXT: .cfi_def_cfa_offset 16 6279; AVX-NEXT: popq %rbx 6280; AVX-NEXT: .cfi_def_cfa_offset 8 6281; AVX-NEXT: retq 6282entry: 6283 %b = load <3 x float>, ptr %a 6284 %round = call <3 x float> @llvm.experimental.constrained.round.v3f32( 6285 <3 x float> %b, 6286 metadata !"fpexcept.strict") #0 6287 ret <3 x float> %round 6288} 6289 6290 6291define <3 x double> @constrained_vector_round_v3f64_var(ptr %a) #0 { 6292; CHECK-LABEL: constrained_vector_round_v3f64_var: 6293; CHECK: # %bb.0: # %entry 6294; CHECK-NEXT: subq $40, %rsp 6295; CHECK-NEXT: .cfi_def_cfa_offset 48 6296; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 6297; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 6298; CHECK-NEXT: movaps (%rdi), %xmm0 6299; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 6300; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 6301; CHECK-NEXT: callq round@PLT 6302; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 6303; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 6304; CHECK-NEXT: callq round@PLT 6305; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 6306; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload 6307; CHECK-NEXT: # xmm0 = mem[0],zero 6308; CHECK-NEXT: callq round@PLT 6309; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 6310; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 6311; CHECK-NEXT: wait 6312; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 6313; CHECK-NEXT: # xmm0 = mem[0],zero 6314; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 6315; CHECK-NEXT: # xmm1 = mem[0],zero 6316; CHECK-NEXT: addq $40, %rsp 6317; CHECK-NEXT: .cfi_def_cfa_offset 8 6318; CHECK-NEXT: retq 6319; 6320; AVX-LABEL: constrained_vector_round_v3f64_var: 6321; AVX: # %bb.0: # %entry 6322; AVX-NEXT: pushq %rbx 6323; AVX-NEXT: .cfi_def_cfa_offset 16 6324; AVX-NEXT: subq $48, %rsp 6325; AVX-NEXT: .cfi_def_cfa_offset 64 6326; AVX-NEXT: .cfi_offset %rbx, -16 6327; AVX-NEXT: movq %rdi, %rbx 6328; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 6329; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 6330; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 6331; AVX-NEXT: callq round@PLT 6332; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 6333; AVX-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload 6334; AVX-NEXT: # xmm0 = mem[0],zero 6335; AVX-NEXT: callq round@PLT 6336; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 6337; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 6338; AVX-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 6339; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 6340; AVX-NEXT: vzeroupper 6341; AVX-NEXT: callq round@PLT 6342; AVX-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload 6343; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 6344; AVX-NEXT: addq $48, %rsp 6345; AVX-NEXT: .cfi_def_cfa_offset 16 6346; AVX-NEXT: popq %rbx 6347; AVX-NEXT: .cfi_def_cfa_offset 8 6348; AVX-NEXT: retq 6349entry: 6350 %b = load <3 x double>, ptr %a 6351 %round = call <3 x double> @llvm.experimental.constrained.round.v3f64( 6352 <3 x double> %b, 6353 metadata !"fpexcept.strict") #0 6354 ret <3 x double> %round 6355} 6356 6357define <1 x float> @constrained_vector_trunc_v1f32_var(ptr %a) #0 { 6358; CHECK-LABEL: constrained_vector_trunc_v1f32_var: 6359; CHECK: # %bb.0: # %entry 6360; CHECK-NEXT: pushq %rax 6361; CHECK-NEXT: .cfi_def_cfa_offset 16 6362; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6363; CHECK-NEXT: callq truncf@PLT 6364; CHECK-NEXT: popq %rax 6365; CHECK-NEXT: .cfi_def_cfa_offset 8 6366; CHECK-NEXT: retq 6367; 6368; AVX-LABEL: constrained_vector_trunc_v1f32_var: 6369; AVX: # %bb.0: # %entry 6370; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6371; AVX-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 6372; AVX-NEXT: retq 6373entry: 6374 %b = load <1 x float>, ptr %a 6375 %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32( 6376 <1 x float> %b, 6377 metadata !"fpexcept.strict") #0 6378 ret <1 x float> %trunc 6379} 6380 6381define <2 x double> @constrained_vector_trunc_v2f64_var(ptr %a) #0 { 6382; CHECK-LABEL: constrained_vector_trunc_v2f64_var: 6383; CHECK: # %bb.0: # %entry 6384; CHECK-NEXT: subq $40, %rsp 6385; CHECK-NEXT: .cfi_def_cfa_offset 48 6386; CHECK-NEXT: movaps (%rdi), %xmm0 6387; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 6388; CHECK-NEXT: callq trunc@PLT 6389; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 6390; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 6391; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 6392; CHECK-NEXT: callq trunc@PLT 6393; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 6394; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 6395; CHECK-NEXT: movaps %xmm1, %xmm0 6396; CHECK-NEXT: addq $40, %rsp 6397; CHECK-NEXT: .cfi_def_cfa_offset 8 6398; CHECK-NEXT: retq 6399; 6400; AVX-LABEL: constrained_vector_trunc_v2f64_var: 6401; AVX: # %bb.0: # %entry 6402; AVX-NEXT: vroundpd $11, (%rdi), %xmm0 6403; AVX-NEXT: retq 6404entry: 6405 %b = load <2 x double>, ptr %a 6406 %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64( 6407 <2 x double> %b, 6408 metadata !"fpexcept.strict") #0 6409 ret <2 x double> %trunc 6410} 6411 6412define <3 x float> @constrained_vector_trunc_v3f32_var(ptr %a) #0 { 6413; CHECK-LABEL: constrained_vector_trunc_v3f32_var: 6414; CHECK: # %bb.0: # %entry 6415; CHECK-NEXT: subq $56, %rsp 6416; CHECK-NEXT: .cfi_def_cfa_offset 64 6417; CHECK-NEXT: movaps (%rdi), %xmm0 6418; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 6419; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 6420; CHECK-NEXT: callq truncf@PLT 6421; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 6422; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 6423; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 6424; CHECK-NEXT: callq truncf@PLT 6425; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 6426; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 6427; CHECK-NEXT: callq truncf@PLT 6428; CHECK-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 6429; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 6430; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 6431; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 6432; CHECK-NEXT: addq $56, %rsp 6433; CHECK-NEXT: .cfi_def_cfa_offset 8 6434; CHECK-NEXT: retq 6435; 6436; AVX-LABEL: constrained_vector_trunc_v3f32_var: 6437; AVX: # %bb.0: # %entry 6438; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 6439; AVX-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 6440; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 6441; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero 6442; AVX-NEXT: vroundss $11, %xmm1, %xmm1, %xmm1 6443; AVX-NEXT: vroundss $11, %xmm2, %xmm2, %xmm2 6444; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 6445; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 6446; AVX-NEXT: retq 6447entry: 6448 %b = load <3 x float>, ptr %a 6449 %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32( 6450 <3 x float> %b, 6451 metadata !"fpexcept.strict") #0 6452 ret <3 x float> %trunc 6453} 6454 6455define <3 x double> @constrained_vector_trunc_v3f64_var(ptr %a) #0 { 6456; CHECK-LABEL: constrained_vector_trunc_v3f64_var: 6457; CHECK: # %bb.0: # %entry 6458; CHECK-NEXT: subq $40, %rsp 6459; CHECK-NEXT: .cfi_def_cfa_offset 48 6460; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 6461; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 6462; CHECK-NEXT: movaps (%rdi), %xmm0 6463; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 6464; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 6465; CHECK-NEXT: callq trunc@PLT 6466; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 6467; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 6468; CHECK-NEXT: callq trunc@PLT 6469; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 6470; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload 6471; CHECK-NEXT: # xmm0 = mem[0],zero 6472; CHECK-NEXT: callq trunc@PLT 6473; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 6474; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 6475; CHECK-NEXT: wait 6476; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 6477; CHECK-NEXT: # xmm0 = mem[0],zero 6478; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 6479; CHECK-NEXT: # xmm1 = mem[0],zero 6480; CHECK-NEXT: addq $40, %rsp 6481; CHECK-NEXT: .cfi_def_cfa_offset 8 6482; CHECK-NEXT: retq 6483; 6484; AVX-LABEL: constrained_vector_trunc_v3f64_var: 6485; AVX: # %bb.0: # %entry 6486; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 6487; AVX-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 6488; AVX-NEXT: vroundpd $11, (%rdi), %xmm1 6489; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 6490; AVX-NEXT: retq 6491entry: 6492 %b = load <3 x double>, ptr %a 6493 %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64( 6494 <3 x double> %b, 6495 metadata !"fpexcept.strict") #0 6496 ret <3 x double> %trunc 6497} 6498 6499define <1 x double> @constrained_vector_sitofp_v1f64_v1i32(<1 x i32> %x) #0 { 6500; CHECK-LABEL: constrained_vector_sitofp_v1f64_v1i32: 6501; CHECK: # %bb.0: # %entry 6502; CHECK-NEXT: cvtsi2sd %edi, %xmm0 6503; CHECK-NEXT: retq 6504; 6505; AVX-LABEL: constrained_vector_sitofp_v1f64_v1i32: 6506; AVX: # %bb.0: # %entry 6507; AVX-NEXT: vcvtsi2sd %edi, %xmm0, %xmm0 6508; AVX-NEXT: retq 6509entry: 6510 %result = call <1 x double> 6511 @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32> %x, 6512 metadata !"round.dynamic", 6513 metadata !"fpexcept.strict") #0 6514 ret <1 x double> %result 6515} 6516 6517define <1 x float> @constrained_vector_sitofp_v1f32_v1i32(<1 x i32> %x) #0 { 6518; CHECK-LABEL: constrained_vector_sitofp_v1f32_v1i32: 6519; CHECK: # %bb.0: # %entry 6520; CHECK-NEXT: cvtsi2ss %edi, %xmm0 6521; CHECK-NEXT: retq 6522; 6523; AVX-LABEL: constrained_vector_sitofp_v1f32_v1i32: 6524; AVX: # %bb.0: # %entry 6525; AVX-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 6526; AVX-NEXT: retq 6527entry: 6528 %result = call <1 x float> 6529 @llvm.experimental.constrained.sitofp.v1f32.v1i32(<1 x i32> %x, 6530 metadata !"round.dynamic", 6531 metadata !"fpexcept.strict") #0 6532 ret <1 x float> %result 6533} 6534 6535define <1 x double> @constrained_vector_sitofp_v1f64_v1i64(<1 x i64> %x) #0 { 6536; CHECK-LABEL: constrained_vector_sitofp_v1f64_v1i64: 6537; CHECK: # %bb.0: # %entry 6538; CHECK-NEXT: cvtsi2sd %rdi, %xmm0 6539; CHECK-NEXT: retq 6540; 6541; AVX-LABEL: constrained_vector_sitofp_v1f64_v1i64: 6542; AVX: # %bb.0: # %entry 6543; AVX-NEXT: vcvtsi2sd %rdi, %xmm0, %xmm0 6544; AVX-NEXT: retq 6545entry: 6546 %result = call <1 x double> 6547 @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64> %x, 6548 metadata !"round.dynamic", 6549 metadata !"fpexcept.strict") #0 6550 ret <1 x double> %result 6551} 6552 6553define <1 x float> @constrained_vector_sitofp_v1f32_v1i64(<1 x i64> %x) #0 { 6554; CHECK-LABEL: constrained_vector_sitofp_v1f32_v1i64: 6555; CHECK: # %bb.0: # %entry 6556; CHECK-NEXT: cvtsi2ss %rdi, %xmm0 6557; CHECK-NEXT: retq 6558; 6559; AVX-LABEL: constrained_vector_sitofp_v1f32_v1i64: 6560; AVX: # %bb.0: # %entry 6561; AVX-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 6562; AVX-NEXT: retq 6563entry: 6564 %result = call <1 x float> 6565 @llvm.experimental.constrained.sitofp.v1f32.v1i64(<1 x i64> %x, 6566 metadata !"round.dynamic", 6567 metadata !"fpexcept.strict") #0 6568 ret <1 x float> %result 6569} 6570 6571define <2 x double> @constrained_vector_sitofp_v2f64_v2i32(<2 x i32> %x) #0 { 6572; CHECK-LABEL: constrained_vector_sitofp_v2f64_v2i32: 6573; CHECK: # %bb.0: # %entry 6574; CHECK-NEXT: cvtdq2pd %xmm0, %xmm0 6575; CHECK-NEXT: retq 6576; 6577; AVX-LABEL: constrained_vector_sitofp_v2f64_v2i32: 6578; AVX: # %bb.0: # %entry 6579; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 6580; AVX-NEXT: retq 6581entry: 6582 %result = call <2 x double> 6583 @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32> %x, 6584 metadata !"round.dynamic", 6585 metadata !"fpexcept.strict") #0 6586 ret <2 x double> %result 6587} 6588 6589define <2 x float> @constrained_vector_sitofp_v2f32_v2i32(<2 x i32> %x) #0 { 6590; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i32: 6591; CHECK: # %bb.0: # %entry 6592; CHECK-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 6593; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0 6594; CHECK-NEXT: retq 6595; 6596; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i32: 6597; AVX: # %bb.0: # %entry 6598; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 6599; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 6600; AVX-NEXT: retq 6601entry: 6602 %result = call <2 x float> 6603 @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32> %x, 6604 metadata !"round.dynamic", 6605 metadata !"fpexcept.strict") #0 6606 ret <2 x float> %result 6607} 6608 6609define <2 x double> @constrained_vector_sitofp_v2f64_v2i64(<2 x i64> %x) #0 { 6610; CHECK-LABEL: constrained_vector_sitofp_v2f64_v2i64: 6611; CHECK: # %bb.0: # %entry 6612; CHECK-NEXT: movq %xmm0, %rax 6613; CHECK-NEXT: cvtsi2sd %rax, %xmm1 6614; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 6615; CHECK-NEXT: movq %xmm0, %rax 6616; CHECK-NEXT: xorps %xmm0, %xmm0 6617; CHECK-NEXT: cvtsi2sd %rax, %xmm0 6618; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] 6619; CHECK-NEXT: movapd %xmm1, %xmm0 6620; CHECK-NEXT: retq 6621; 6622; AVX1-LABEL: constrained_vector_sitofp_v2f64_v2i64: 6623; AVX1: # %bb.0: # %entry 6624; AVX1-NEXT: vpextrq $1, %xmm0, %rax 6625; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 6626; AVX1-NEXT: vmovq %xmm0, %rax 6627; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 6628; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 6629; AVX1-NEXT: retq 6630; 6631; AVX512F-LABEL: constrained_vector_sitofp_v2f64_v2i64: 6632; AVX512F: # %bb.0: # %entry 6633; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 6634; AVX512F-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 6635; AVX512F-NEXT: vmovq %xmm0, %rax 6636; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm0 6637; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 6638; AVX512F-NEXT: retq 6639; 6640; AVX512DQ-LABEL: constrained_vector_sitofp_v2f64_v2i64: 6641; AVX512DQ: # %bb.0: # %entry 6642; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 6643; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 6644; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 6645; AVX512DQ-NEXT: vzeroupper 6646; AVX512DQ-NEXT: retq 6647entry: 6648 %result = call <2 x double> 6649 @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64> %x, 6650 metadata !"round.dynamic", 6651 metadata !"fpexcept.strict") #0 6652 ret <2 x double> %result 6653} 6654 6655define <2 x float> @constrained_vector_sitofp_v2f32_v2i64(<2 x i64> %x) #0 { 6656; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i64: 6657; CHECK: # %bb.0: # %entry 6658; CHECK-NEXT: movq %xmm0, %rax 6659; CHECK-NEXT: cvtsi2ss %rax, %xmm1 6660; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 6661; CHECK-NEXT: movq %xmm0, %rax 6662; CHECK-NEXT: xorps %xmm0, %xmm0 6663; CHECK-NEXT: cvtsi2ss %rax, %xmm0 6664; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6665; CHECK-NEXT: movaps %xmm1, %xmm0 6666; CHECK-NEXT: retq 6667; 6668; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i64: 6669; AVX: # %bb.0: # %entry 6670; AVX-NEXT: vpextrq $1, %xmm0, %rax 6671; AVX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 6672; AVX-NEXT: vmovq %xmm0, %rax 6673; AVX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 6674; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 6675; AVX-NEXT: retq 6676entry: 6677 %result = call <2 x float> 6678 @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64> %x, 6679 metadata !"round.dynamic", 6680 metadata !"fpexcept.strict") #0 6681 ret <2 x float> %result 6682} 6683 6684define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 { 6685; CHECK-LABEL: constrained_vector_sitofp_v3f64_v3i32: 6686; CHECK: # %bb.0: # %entry 6687; CHECK-NEXT: movd %xmm0, %eax 6688; CHECK-NEXT: cvtsi2sd %eax, %xmm2 6689; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 6690; CHECK-NEXT: movd %xmm1, %eax 6691; CHECK-NEXT: xorps %xmm1, %xmm1 6692; CHECK-NEXT: cvtsi2sd %eax, %xmm1 6693; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 6694; CHECK-NEXT: movd %xmm0, %eax 6695; CHECK-NEXT: xorps %xmm0, %xmm0 6696; CHECK-NEXT: cvtsi2sd %eax, %xmm0 6697; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) 6698; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 6699; CHECK-NEXT: wait 6700; CHECK-NEXT: movapd %xmm2, %xmm0 6701; CHECK-NEXT: retq 6702; 6703; AVX-LABEL: constrained_vector_sitofp_v3f64_v3i32: 6704; AVX: # %bb.0: # %entry 6705; AVX-NEXT: vextractps $1, %xmm0, %eax 6706; AVX-NEXT: vcvtsi2sd %eax, %xmm1, %xmm1 6707; AVX-NEXT: vmovd %xmm0, %eax 6708; AVX-NEXT: vcvtsi2sd %eax, %xmm2, %xmm2 6709; AVX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 6710; AVX-NEXT: vpextrd $2, %xmm0, %eax 6711; AVX-NEXT: vcvtsi2sd %eax, %xmm3, %xmm0 6712; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 6713; AVX-NEXT: retq 6714entry: 6715 %result = call <3 x double> 6716 @llvm.experimental.constrained.sitofp.v3f64.v3i32(<3 x i32> %x, 6717 metadata !"round.dynamic", 6718 metadata !"fpexcept.strict") #0 6719 ret <3 x double> %result 6720} 6721 6722define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 { 6723; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i32: 6724; CHECK: # %bb.0: # %entry 6725; CHECK-NEXT: movd %xmm0, %eax 6726; CHECK-NEXT: cvtsi2ss %eax, %xmm1 6727; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 6728; CHECK-NEXT: movd %xmm2, %eax 6729; CHECK-NEXT: xorps %xmm2, %xmm2 6730; CHECK-NEXT: cvtsi2ss %eax, %xmm2 6731; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 6732; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 6733; CHECK-NEXT: movd %xmm0, %eax 6734; CHECK-NEXT: xorps %xmm0, %xmm0 6735; CHECK-NEXT: cvtsi2ss %eax, %xmm0 6736; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 6737; CHECK-NEXT: movaps %xmm1, %xmm0 6738; CHECK-NEXT: retq 6739; 6740; AVX-LABEL: constrained_vector_sitofp_v3f32_v3i32: 6741; AVX: # %bb.0: # %entry 6742; AVX-NEXT: vextractps $1, %xmm0, %eax 6743; AVX-NEXT: vcvtsi2ss %eax, %xmm1, %xmm1 6744; AVX-NEXT: vmovd %xmm0, %eax 6745; AVX-NEXT: vcvtsi2ss %eax, %xmm2, %xmm2 6746; AVX-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 6747; AVX-NEXT: vpextrd $2, %xmm0, %eax 6748; AVX-NEXT: vcvtsi2ss %eax, %xmm3, %xmm0 6749; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 6750; AVX-NEXT: retq 6751entry: 6752 %result = call <3 x float> 6753 @llvm.experimental.constrained.sitofp.v3f32.v3i32(<3 x i32> %x, 6754 metadata !"round.dynamic", 6755 metadata !"fpexcept.strict") #0 6756 ret <3 x float> %result 6757} 6758 6759define <3 x double> @constrained_vector_sitofp_v3f64_v3i64(<3 x i64> %x) #0 { 6760; CHECK-LABEL: constrained_vector_sitofp_v3f64_v3i64: 6761; CHECK: # %bb.0: # %entry 6762; CHECK-NEXT: cvtsi2sd %rsi, %xmm1 6763; CHECK-NEXT: cvtsi2sd %rdi, %xmm0 6764; CHECK-NEXT: cvtsi2sd %rdx, %xmm2 6765; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) 6766; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 6767; CHECK-NEXT: wait 6768; CHECK-NEXT: retq 6769; 6770; AVX1-LABEL: constrained_vector_sitofp_v3f64_v3i64: 6771; AVX1: # %bb.0: # %entry 6772; AVX1-NEXT: vpextrq $1, %xmm0, %rax 6773; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 6774; AVX1-NEXT: vmovq %xmm0, %rax 6775; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 6776; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 6777; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 6778; AVX1-NEXT: vmovq %xmm0, %rax 6779; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 6780; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 6781; AVX1-NEXT: retq 6782; 6783; AVX512-LABEL: constrained_vector_sitofp_v3f64_v3i64: 6784; AVX512: # %bb.0: # %entry 6785; AVX512-NEXT: vpextrq $1, %xmm0, %rax 6786; AVX512-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 6787; AVX512-NEXT: vmovq %xmm0, %rax 6788; AVX512-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 6789; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 6790; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 6791; AVX512-NEXT: vmovq %xmm0, %rax 6792; AVX512-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 6793; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 6794; AVX512-NEXT: retq 6795entry: 6796 %result = call <3 x double> 6797 @llvm.experimental.constrained.sitofp.v3f64.v3i64(<3 x i64> %x, 6798 metadata !"round.dynamic", 6799 metadata !"fpexcept.strict") #0 6800 ret <3 x double> %result 6801} 6802 6803define <3 x float> @constrained_vector_sitofp_v3f32_v3i64(<3 x i64> %x) #0 { 6804; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i64: 6805; CHECK: # %bb.0: # %entry 6806; CHECK-NEXT: cvtsi2ss %rsi, %xmm1 6807; CHECK-NEXT: cvtsi2ss %rdi, %xmm0 6808; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 6809; CHECK-NEXT: xorps %xmm1, %xmm1 6810; CHECK-NEXT: cvtsi2ss %rdx, %xmm1 6811; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 6812; CHECK-NEXT: retq 6813; 6814; AVX1-LABEL: constrained_vector_sitofp_v3f32_v3i64: 6815; AVX1: # %bb.0: # %entry 6816; AVX1-NEXT: vpextrq $1, %xmm0, %rax 6817; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 6818; AVX1-NEXT: vmovq %xmm0, %rax 6819; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 6820; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 6821; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 6822; AVX1-NEXT: vmovq %xmm0, %rax 6823; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 6824; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 6825; AVX1-NEXT: vzeroupper 6826; AVX1-NEXT: retq 6827; 6828; AVX512-LABEL: constrained_vector_sitofp_v3f32_v3i64: 6829; AVX512: # %bb.0: # %entry 6830; AVX512-NEXT: vpextrq $1, %xmm0, %rax 6831; AVX512-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 6832; AVX512-NEXT: vmovq %xmm0, %rax 6833; AVX512-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 6834; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 6835; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 6836; AVX512-NEXT: vmovq %xmm0, %rax 6837; AVX512-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 6838; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 6839; AVX512-NEXT: vzeroupper 6840; AVX512-NEXT: retq 6841entry: 6842 %result = call <3 x float> 6843 @llvm.experimental.constrained.sitofp.v3f32.v3i64(<3 x i64> %x, 6844 metadata !"round.dynamic", 6845 metadata !"fpexcept.strict") #0 6846 ret <3 x float> %result 6847} 6848 6849define <4 x double> @constrained_vector_sitofp_v4f64_v4i32(<4 x i32> %x) #0 { 6850; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i32: 6851; CHECK: # %bb.0: # %entry 6852; CHECK-NEXT: cvtdq2pd %xmm0, %xmm2 6853; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 6854; CHECK-NEXT: cvtdq2pd %xmm0, %xmm1 6855; CHECK-NEXT: movaps %xmm2, %xmm0 6856; CHECK-NEXT: retq 6857; 6858; AVX-LABEL: constrained_vector_sitofp_v4f64_v4i32: 6859; AVX: # %bb.0: # %entry 6860; AVX-NEXT: vcvtdq2pd %xmm0, %ymm0 6861; AVX-NEXT: retq 6862entry: 6863 %result = call <4 x double> 6864 @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32> %x, 6865 metadata !"round.dynamic", 6866 metadata !"fpexcept.strict") #0 6867 ret <4 x double> %result 6868} 6869 6870define <4 x float> @constrained_vector_sitofp_v4f32_v4i32(<4 x i32> %x) #0 { 6871; CHECK-LABEL: constrained_vector_sitofp_v4f32_v4i32: 6872; CHECK: # %bb.0: # %entry 6873; CHECK-NEXT: cvtdq2ps %xmm0, %xmm0 6874; CHECK-NEXT: retq 6875; 6876; AVX-LABEL: constrained_vector_sitofp_v4f32_v4i32: 6877; AVX: # %bb.0: # %entry 6878; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 6879; AVX-NEXT: retq 6880entry: 6881 %result = call <4 x float> 6882 @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32> %x, 6883 metadata !"round.dynamic", 6884 metadata !"fpexcept.strict") #0 6885 ret <4 x float> %result 6886} 6887 6888define <4 x double> @constrained_vector_sitofp_v4f64_v4i64(<4 x i64> %x) #0 { 6889; CHECK-LABEL: constrained_vector_sitofp_v4f64_v4i64: 6890; CHECK: # %bb.0: # %entry 6891; CHECK-NEXT: movq %xmm0, %rax 6892; CHECK-NEXT: cvtsi2sd %rax, %xmm2 6893; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 6894; CHECK-NEXT: movq %xmm0, %rax 6895; CHECK-NEXT: xorps %xmm0, %xmm0 6896; CHECK-NEXT: cvtsi2sd %rax, %xmm0 6897; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0] 6898; CHECK-NEXT: movq %xmm1, %rax 6899; CHECK-NEXT: cvtsi2sd %rax, %xmm3 6900; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 6901; CHECK-NEXT: movq %xmm0, %rax 6902; CHECK-NEXT: xorps %xmm0, %xmm0 6903; CHECK-NEXT: cvtsi2sd %rax, %xmm0 6904; CHECK-NEXT: unpcklpd {{.*#+}} xmm3 = xmm3[0],xmm0[0] 6905; CHECK-NEXT: movapd %xmm2, %xmm0 6906; CHECK-NEXT: movapd %xmm3, %xmm1 6907; CHECK-NEXT: retq 6908; 6909; AVX1-LABEL: constrained_vector_sitofp_v4f64_v4i64: 6910; AVX1: # %bb.0: # %entry 6911; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 6912; AVX1-NEXT: vpextrq $1, %xmm1, %rax 6913; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 6914; AVX1-NEXT: vmovq %xmm1, %rax 6915; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 6916; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 6917; AVX1-NEXT: vpextrq $1, %xmm0, %rax 6918; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 6919; AVX1-NEXT: vmovq %xmm0, %rax 6920; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 6921; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 6922; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 6923; AVX1-NEXT: retq 6924; 6925; AVX512F-LABEL: constrained_vector_sitofp_v4f64_v4i64: 6926; AVX512F: # %bb.0: # %entry 6927; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 6928; AVX512F-NEXT: vpextrq $1, %xmm1, %rax 6929; AVX512F-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 6930; AVX512F-NEXT: vmovq %xmm1, %rax 6931; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 6932; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 6933; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 6934; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm2 6935; AVX512F-NEXT: vmovq %xmm0, %rax 6936; AVX512F-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 6937; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 6938; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 6939; AVX512F-NEXT: retq 6940; 6941; AVX512DQ-LABEL: constrained_vector_sitofp_v4f64_v4i64: 6942; AVX512DQ: # %bb.0: # %entry 6943; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 6944; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 6945; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 6946; AVX512DQ-NEXT: retq 6947entry: 6948 %result = call <4 x double> 6949 @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64> %x, 6950 metadata !"round.dynamic", 6951 metadata !"fpexcept.strict") #0 6952 ret <4 x double> %result 6953} 6954 6955define <4 x float> @constrained_vector_sitofp_v4f32_v4i64(<4 x i64> %x) #0 { 6956; CHECK-LABEL: constrained_vector_sitofp_v4f32_v4i64: 6957; CHECK: # %bb.0: # %entry 6958; CHECK-NEXT: movq %xmm1, %rax 6959; CHECK-NEXT: cvtsi2ss %rax, %xmm2 6960; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 6961; CHECK-NEXT: movq %xmm1, %rax 6962; CHECK-NEXT: xorps %xmm1, %xmm1 6963; CHECK-NEXT: cvtsi2ss %rax, %xmm1 6964; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 6965; CHECK-NEXT: movq %xmm0, %rax 6966; CHECK-NEXT: xorps %xmm1, %xmm1 6967; CHECK-NEXT: cvtsi2ss %rax, %xmm1 6968; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 6969; CHECK-NEXT: movq %xmm0, %rax 6970; CHECK-NEXT: xorps %xmm0, %xmm0 6971; CHECK-NEXT: cvtsi2ss %rax, %xmm0 6972; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 6973; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 6974; CHECK-NEXT: movaps %xmm1, %xmm0 6975; CHECK-NEXT: retq 6976; 6977; AVX1-LABEL: constrained_vector_sitofp_v4f32_v4i64: 6978; AVX1: # %bb.0: # %entry 6979; AVX1-NEXT: vpextrq $1, %xmm0, %rax 6980; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 6981; AVX1-NEXT: vmovq %xmm0, %rax 6982; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 6983; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 6984; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 6985; AVX1-NEXT: vmovq %xmm0, %rax 6986; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 6987; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 6988; AVX1-NEXT: vpextrq $1, %xmm0, %rax 6989; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 6990; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 6991; AVX1-NEXT: vzeroupper 6992; AVX1-NEXT: retq 6993; 6994; AVX512F-LABEL: constrained_vector_sitofp_v4f32_v4i64: 6995; AVX512F: # %bb.0: # %entry 6996; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 6997; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 6998; AVX512F-NEXT: vmovq %xmm0, %rax 6999; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 7000; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 7001; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 7002; AVX512F-NEXT: vmovq %xmm0, %rax 7003; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 7004; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 7005; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 7006; AVX512F-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 7007; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 7008; AVX512F-NEXT: vzeroupper 7009; AVX512F-NEXT: retq 7010; 7011; AVX512DQ-LABEL: constrained_vector_sitofp_v4f32_v4i64: 7012; AVX512DQ: # %bb.0: # %entry 7013; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 7014; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 7015; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 7016; AVX512DQ-NEXT: vzeroupper 7017; AVX512DQ-NEXT: retq 7018entry: 7019 %result = call <4 x float> 7020 @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64> %x, 7021 metadata !"round.dynamic", 7022 metadata !"fpexcept.strict") #0 7023 ret <4 x float> %result 7024} 7025 7026define <1 x double> @constrained_vector_uitofp_v1f64_v1i32(<1 x i32> %x) #0 { 7027; CHECK-LABEL: constrained_vector_uitofp_v1f64_v1i32: 7028; CHECK: # %bb.0: # %entry 7029; CHECK-NEXT: movl %edi, %eax 7030; CHECK-NEXT: cvtsi2sd %rax, %xmm0 7031; CHECK-NEXT: retq 7032; 7033; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i32: 7034; AVX1: # %bb.0: # %entry 7035; AVX1-NEXT: movl %edi, %eax 7036; AVX1-NEXT: vcvtsi2sd %rax, %xmm0, %xmm0 7037; AVX1-NEXT: retq 7038; 7039; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i32: 7040; AVX512: # %bb.0: # %entry 7041; AVX512-NEXT: vcvtusi2sd %edi, %xmm0, %xmm0 7042; AVX512-NEXT: retq 7043entry: 7044 %result = call <1 x double> 7045 @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32> %x, 7046 metadata !"round.dynamic", 7047 metadata !"fpexcept.strict") #0 7048 ret <1 x double> %result 7049} 7050 7051define <1 x float> @constrained_vector_uitofp_v1f32_v1i32(<1 x i32> %x) #0 { 7052; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i32: 7053; CHECK: # %bb.0: # %entry 7054; CHECK-NEXT: movl %edi, %eax 7055; CHECK-NEXT: cvtsi2ss %rax, %xmm0 7056; CHECK-NEXT: retq 7057; 7058; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i32: 7059; AVX1: # %bb.0: # %entry 7060; AVX1-NEXT: movl %edi, %eax 7061; AVX1-NEXT: vcvtsi2ss %rax, %xmm0, %xmm0 7062; AVX1-NEXT: retq 7063; 7064; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i32: 7065; AVX512: # %bb.0: # %entry 7066; AVX512-NEXT: vcvtusi2ss %edi, %xmm0, %xmm0 7067; AVX512-NEXT: retq 7068entry: 7069 %result = call <1 x float> 7070 @llvm.experimental.constrained.uitofp.v1f32.v1i32(<1 x i32> %x, 7071 metadata !"round.dynamic", 7072 metadata !"fpexcept.strict") #0 7073 ret <1 x float> %result 7074} 7075 7076define <1 x double> @constrained_vector_uitofp_v1f64_v1i64(<1 x i64> %x) #0 { 7077; CHECK-LABEL: constrained_vector_uitofp_v1f64_v1i64: 7078; CHECK: # %bb.0: # %entry 7079; CHECK-NEXT: movq %rdi, %rax 7080; CHECK-NEXT: shrq %rax 7081; CHECK-NEXT: movl %edi, %ecx 7082; CHECK-NEXT: andl $1, %ecx 7083; CHECK-NEXT: orq %rax, %rcx 7084; CHECK-NEXT: testq %rdi, %rdi 7085; CHECK-NEXT: cmovnsq %rdi, %rcx 7086; CHECK-NEXT: cvtsi2sd %rcx, %xmm0 7087; CHECK-NEXT: jns .LBB175_2 7088; CHECK-NEXT: # %bb.1: 7089; CHECK-NEXT: addsd %xmm0, %xmm0 7090; CHECK-NEXT: .LBB175_2: # %entry 7091; CHECK-NEXT: retq 7092; 7093; AVX1-LABEL: constrained_vector_uitofp_v1f64_v1i64: 7094; AVX1: # %bb.0: # %entry 7095; AVX1-NEXT: movq %rdi, %rax 7096; AVX1-NEXT: shrq %rax 7097; AVX1-NEXT: movl %edi, %ecx 7098; AVX1-NEXT: andl $1, %ecx 7099; AVX1-NEXT: orq %rax, %rcx 7100; AVX1-NEXT: testq %rdi, %rdi 7101; AVX1-NEXT: cmovnsq %rdi, %rcx 7102; AVX1-NEXT: vcvtsi2sd %rcx, %xmm0, %xmm0 7103; AVX1-NEXT: jns .LBB175_2 7104; AVX1-NEXT: # %bb.1: 7105; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0 7106; AVX1-NEXT: .LBB175_2: # %entry 7107; AVX1-NEXT: retq 7108; 7109; AVX512-LABEL: constrained_vector_uitofp_v1f64_v1i64: 7110; AVX512: # %bb.0: # %entry 7111; AVX512-NEXT: vcvtusi2sd %rdi, %xmm0, %xmm0 7112; AVX512-NEXT: retq 7113entry: 7114 %result = call <1 x double> 7115 @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64> %x, 7116 metadata !"round.dynamic", 7117 metadata !"fpexcept.strict") #0 7118 ret <1 x double> %result 7119} 7120 7121define <1 x float> @constrained_vector_uitofp_v1f32_v1i64(<1 x i64> %x) #0 { 7122; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i64: 7123; CHECK: # %bb.0: # %entry 7124; CHECK-NEXT: movq %rdi, %rax 7125; CHECK-NEXT: shrq %rax 7126; CHECK-NEXT: movl %edi, %ecx 7127; CHECK-NEXT: andl $1, %ecx 7128; CHECK-NEXT: orq %rax, %rcx 7129; CHECK-NEXT: testq %rdi, %rdi 7130; CHECK-NEXT: cmovnsq %rdi, %rcx 7131; CHECK-NEXT: cvtsi2ss %rcx, %xmm0 7132; CHECK-NEXT: jns .LBB176_2 7133; CHECK-NEXT: # %bb.1: 7134; CHECK-NEXT: addss %xmm0, %xmm0 7135; CHECK-NEXT: .LBB176_2: # %entry 7136; CHECK-NEXT: retq 7137; 7138; AVX1-LABEL: constrained_vector_uitofp_v1f32_v1i64: 7139; AVX1: # %bb.0: # %entry 7140; AVX1-NEXT: movq %rdi, %rax 7141; AVX1-NEXT: shrq %rax 7142; AVX1-NEXT: movl %edi, %ecx 7143; AVX1-NEXT: andl $1, %ecx 7144; AVX1-NEXT: orq %rax, %rcx 7145; AVX1-NEXT: testq %rdi, %rdi 7146; AVX1-NEXT: cmovnsq %rdi, %rcx 7147; AVX1-NEXT: vcvtsi2ss %rcx, %xmm0, %xmm0 7148; AVX1-NEXT: jns .LBB176_2 7149; AVX1-NEXT: # %bb.1: 7150; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 7151; AVX1-NEXT: .LBB176_2: # %entry 7152; AVX1-NEXT: retq 7153; 7154; AVX512-LABEL: constrained_vector_uitofp_v1f32_v1i64: 7155; AVX512: # %bb.0: # %entry 7156; AVX512-NEXT: vcvtusi2ss %rdi, %xmm0, %xmm0 7157; AVX512-NEXT: retq 7158entry: 7159 %result = call <1 x float> 7160 @llvm.experimental.constrained.uitofp.v1f32.v1i64(<1 x i64> %x, 7161 metadata !"round.dynamic", 7162 metadata !"fpexcept.strict") #0 7163 ret <1 x float> %result 7164} 7165 7166define <2 x double> @constrained_vector_uitofp_v2f64_v2i32(<2 x i32> %x) #0 { 7167; CHECK-LABEL: constrained_vector_uitofp_v2f64_v2i32: 7168; CHECK: # %bb.0: # %entry 7169; CHECK-NEXT: xorpd %xmm1, %xmm1 7170; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 7171; CHECK-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 7172; CHECK-NEXT: orpd %xmm1, %xmm0 7173; CHECK-NEXT: subpd %xmm1, %xmm0 7174; CHECK-NEXT: retq 7175; 7176; AVX1-LABEL: constrained_vector_uitofp_v2f64_v2i32: 7177; AVX1: # %bb.0: # %entry 7178; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7179; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 7180; AVX1-NEXT: # xmm1 = mem[0,0] 7181; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 7182; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 7183; AVX1-NEXT: retq 7184; 7185; AVX512-LABEL: constrained_vector_uitofp_v2f64_v2i32: 7186; AVX512: # %bb.0: # %entry 7187; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 7188; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0 7189; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 7190; AVX512-NEXT: vzeroupper 7191; AVX512-NEXT: retq 7192entry: 7193 %result = call <2 x double> 7194 @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x, 7195 metadata !"round.dynamic", 7196 metadata !"fpexcept.strict") #0 7197 ret <2 x double> %result 7198} 7199 7200define <2 x float> @constrained_vector_uitofp_v2f32_v2i32(<2 x i32> %x) #0 { 7201; CHECK-LABEL: constrained_vector_uitofp_v2f32_v2i32: 7202; CHECK: # %bb.0: # %entry 7203; CHECK-NEXT: xorpd %xmm1, %xmm1 7204; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 7205; CHECK-NEXT: movapd {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 7206; CHECK-NEXT: orpd %xmm1, %xmm0 7207; CHECK-NEXT: subpd %xmm1, %xmm0 7208; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0 7209; CHECK-NEXT: retq 7210; 7211; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i32: 7212; AVX1: # %bb.0: # %entry 7213; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7214; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [4.503599627370496E+15,4.503599627370496E+15] 7215; AVX1-NEXT: # xmm1 = mem[0,0] 7216; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 7217; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 7218; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 7219; AVX1-NEXT: retq 7220; 7221; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i32: 7222; AVX512: # %bb.0: # %entry 7223; AVX512-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero 7224; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0 7225; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 7226; AVX512-NEXT: vzeroupper 7227; AVX512-NEXT: retq 7228entry: 7229 %result = call <2 x float> 7230 @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32> %x, 7231 metadata !"round.dynamic", 7232 metadata !"fpexcept.strict") #0 7233 ret <2 x float> %result 7234} 7235 7236define <2 x double> @constrained_vector_uitofp_v2f64_v2i64(<2 x i64> %x) #0 { 7237; CHECK-LABEL: constrained_vector_uitofp_v2f64_v2i64: 7238; CHECK: # %bb.0: # %entry 7239; CHECK-NEXT: movdqa %xmm0, %xmm1 7240; CHECK-NEXT: movq %xmm0, %rax 7241; CHECK-NEXT: movq %rax, %rcx 7242; CHECK-NEXT: shrq %rcx 7243; CHECK-NEXT: movl %eax, %edx 7244; CHECK-NEXT: andl $1, %edx 7245; CHECK-NEXT: orq %rcx, %rdx 7246; CHECK-NEXT: testq %rax, %rax 7247; CHECK-NEXT: cmovnsq %rax, %rdx 7248; CHECK-NEXT: xorps %xmm0, %xmm0 7249; CHECK-NEXT: cvtsi2sd %rdx, %xmm0 7250; CHECK-NEXT: jns .LBB179_2 7251; CHECK-NEXT: # %bb.1: 7252; CHECK-NEXT: addsd %xmm0, %xmm0 7253; CHECK-NEXT: .LBB179_2: # %entry 7254; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 7255; CHECK-NEXT: movq %xmm1, %rax 7256; CHECK-NEXT: movq %rax, %rcx 7257; CHECK-NEXT: shrq %rcx 7258; CHECK-NEXT: movl %eax, %edx 7259; CHECK-NEXT: andl $1, %edx 7260; CHECK-NEXT: orq %rcx, %rdx 7261; CHECK-NEXT: testq %rax, %rax 7262; CHECK-NEXT: cmovnsq %rax, %rdx 7263; CHECK-NEXT: xorps %xmm1, %xmm1 7264; CHECK-NEXT: cvtsi2sd %rdx, %xmm1 7265; CHECK-NEXT: jns .LBB179_4 7266; CHECK-NEXT: # %bb.3: 7267; CHECK-NEXT: addsd %xmm1, %xmm1 7268; CHECK-NEXT: .LBB179_4: # %entry 7269; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 7270; CHECK-NEXT: retq 7271; 7272; AVX1-LABEL: constrained_vector_uitofp_v2f64_v2i64: 7273; AVX1: # %bb.0: # %entry 7274; AVX1-NEXT: vpextrq $1, %xmm0, %rax 7275; AVX1-NEXT: movq %rax, %rcx 7276; AVX1-NEXT: shrq %rcx 7277; AVX1-NEXT: movl %eax, %edx 7278; AVX1-NEXT: andl $1, %edx 7279; AVX1-NEXT: orq %rcx, %rdx 7280; AVX1-NEXT: testq %rax, %rax 7281; AVX1-NEXT: cmovnsq %rax, %rdx 7282; AVX1-NEXT: vcvtsi2sd %rdx, %xmm1, %xmm1 7283; AVX1-NEXT: jns .LBB179_2 7284; AVX1-NEXT: # %bb.1: 7285; AVX1-NEXT: vaddsd %xmm1, %xmm1, %xmm1 7286; AVX1-NEXT: .LBB179_2: # %entry 7287; AVX1-NEXT: vmovq %xmm0, %rax 7288; AVX1-NEXT: movq %rax, %rcx 7289; AVX1-NEXT: shrq %rcx 7290; AVX1-NEXT: movl %eax, %edx 7291; AVX1-NEXT: andl $1, %edx 7292; AVX1-NEXT: orq %rcx, %rdx 7293; AVX1-NEXT: testq %rax, %rax 7294; AVX1-NEXT: cmovnsq %rax, %rdx 7295; AVX1-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm0 7296; AVX1-NEXT: jns .LBB179_4 7297; AVX1-NEXT: # %bb.3: 7298; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0 7299; AVX1-NEXT: .LBB179_4: # %entry 7300; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 7301; AVX1-NEXT: retq 7302; 7303; AVX512F-LABEL: constrained_vector_uitofp_v2f64_v2i64: 7304; AVX512F: # %bb.0: # %entry 7305; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 7306; AVX512F-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1 7307; AVX512F-NEXT: vmovq %xmm0, %rax 7308; AVX512F-NEXT: vcvtusi2sd %rax, %xmm2, %xmm0 7309; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 7310; AVX512F-NEXT: retq 7311; 7312; AVX512DQ-LABEL: constrained_vector_uitofp_v2f64_v2i64: 7313; AVX512DQ: # %bb.0: # %entry 7314; AVX512DQ-NEXT: vmovaps %xmm0, %xmm0 7315; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 7316; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 7317; AVX512DQ-NEXT: vzeroupper 7318; AVX512DQ-NEXT: retq 7319entry: 7320 %result = call <2 x double> 7321 @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64> %x, 7322 metadata !"round.dynamic", 7323 metadata !"fpexcept.strict") #0 7324 ret <2 x double> %result 7325} 7326 7327define <2 x float> @constrained_vector_uitofp_v2f32_v2i64(<2 x i64> %x) #0 { 7328; CHECK-LABEL: constrained_vector_uitofp_v2f32_v2i64: 7329; CHECK: # %bb.0: # %entry 7330; CHECK-NEXT: movdqa %xmm0, %xmm1 7331; CHECK-NEXT: movq %xmm0, %rax 7332; CHECK-NEXT: movq %rax, %rcx 7333; CHECK-NEXT: shrq %rcx 7334; CHECK-NEXT: movl %eax, %edx 7335; CHECK-NEXT: andl $1, %edx 7336; CHECK-NEXT: orq %rcx, %rdx 7337; CHECK-NEXT: testq %rax, %rax 7338; CHECK-NEXT: cmovnsq %rax, %rdx 7339; CHECK-NEXT: xorps %xmm0, %xmm0 7340; CHECK-NEXT: cvtsi2ss %rdx, %xmm0 7341; CHECK-NEXT: jns .LBB180_2 7342; CHECK-NEXT: # %bb.1: 7343; CHECK-NEXT: addss %xmm0, %xmm0 7344; CHECK-NEXT: .LBB180_2: # %entry 7345; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 7346; CHECK-NEXT: movq %xmm1, %rax 7347; CHECK-NEXT: movq %rax, %rcx 7348; CHECK-NEXT: shrq %rcx 7349; CHECK-NEXT: movl %eax, %edx 7350; CHECK-NEXT: andl $1, %edx 7351; CHECK-NEXT: orq %rcx, %rdx 7352; CHECK-NEXT: testq %rax, %rax 7353; CHECK-NEXT: cmovnsq %rax, %rdx 7354; CHECK-NEXT: xorps %xmm1, %xmm1 7355; CHECK-NEXT: cvtsi2ss %rdx, %xmm1 7356; CHECK-NEXT: jns .LBB180_4 7357; CHECK-NEXT: # %bb.3: 7358; CHECK-NEXT: addss %xmm1, %xmm1 7359; CHECK-NEXT: .LBB180_4: # %entry 7360; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 7361; CHECK-NEXT: retq 7362; 7363; AVX1-LABEL: constrained_vector_uitofp_v2f32_v2i64: 7364; AVX1: # %bb.0: # %entry 7365; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 7366; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm2 7367; AVX1-NEXT: vpor %xmm1, %xmm2, %xmm1 7368; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm0, %xmm1 7369; AVX1-NEXT: vpextrq $1, %xmm1, %rax 7370; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2 7371; AVX1-NEXT: vmovq %xmm1, %rax 7372; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 7373; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],zero,zero 7374; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm2 7375; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 7376; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 7377; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 7378; AVX1-NEXT: vblendvps %xmm0, %xmm2, %xmm1, %xmm0 7379; AVX1-NEXT: retq 7380; 7381; AVX512-LABEL: constrained_vector_uitofp_v2f32_v2i64: 7382; AVX512: # %bb.0: # %entry 7383; AVX512-NEXT: vpextrq $1, %xmm0, %rax 7384; AVX512-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 7385; AVX512-NEXT: vmovq %xmm0, %rax 7386; AVX512-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 7387; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 7388; AVX512-NEXT: retq 7389entry: 7390 %result = call <2 x float> 7391 @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64> %x, 7392 metadata !"round.dynamic", 7393 metadata !"fpexcept.strict") #0 7394 ret <2 x float> %result 7395} 7396 7397define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 { 7398; CHECK-LABEL: constrained_vector_uitofp_v3f64_v3i32: 7399; CHECK: # %bb.0: # %entry 7400; CHECK-NEXT: movd %xmm0, %eax 7401; CHECK-NEXT: cvtsi2sd %rax, %xmm2 7402; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 7403; CHECK-NEXT: movd %xmm1, %eax 7404; CHECK-NEXT: xorps %xmm1, %xmm1 7405; CHECK-NEXT: cvtsi2sd %rax, %xmm1 7406; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 7407; CHECK-NEXT: movd %xmm0, %eax 7408; CHECK-NEXT: xorps %xmm0, %xmm0 7409; CHECK-NEXT: cvtsi2sd %rax, %xmm0 7410; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) 7411; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 7412; CHECK-NEXT: wait 7413; CHECK-NEXT: movapd %xmm2, %xmm0 7414; CHECK-NEXT: retq 7415; 7416; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i32: 7417; AVX1: # %bb.0: # %entry 7418; AVX1-NEXT: vextractps $1, %xmm0, %eax 7419; AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm1 7420; AVX1-NEXT: vmovd %xmm0, %eax 7421; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 7422; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 7423; AVX1-NEXT: vpextrd $2, %xmm0, %eax 7424; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm0 7425; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 7426; AVX1-NEXT: retq 7427; 7428; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i32: 7429; AVX512: # %bb.0: # %entry 7430; AVX512-NEXT: vextractps $1, %xmm0, %eax 7431; AVX512-NEXT: vcvtusi2sd %eax, %xmm1, %xmm1 7432; AVX512-NEXT: vmovd %xmm0, %eax 7433; AVX512-NEXT: vcvtusi2sd %eax, %xmm2, %xmm2 7434; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 7435; AVX512-NEXT: vpextrd $2, %xmm0, %eax 7436; AVX512-NEXT: vcvtusi2sd %eax, %xmm3, %xmm0 7437; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 7438; AVX512-NEXT: retq 7439entry: 7440 %result = call <3 x double> 7441 @llvm.experimental.constrained.uitofp.v3f64.v3i32(<3 x i32> %x, 7442 metadata !"round.dynamic", 7443 metadata !"fpexcept.strict") #0 7444 ret <3 x double> %result 7445} 7446 7447define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 { 7448; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i32: 7449; CHECK: # %bb.0: # %entry 7450; CHECK-NEXT: movd %xmm0, %eax 7451; CHECK-NEXT: cvtsi2ss %rax, %xmm1 7452; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 7453; CHECK-NEXT: movd %xmm2, %eax 7454; CHECK-NEXT: xorps %xmm2, %xmm2 7455; CHECK-NEXT: cvtsi2ss %rax, %xmm2 7456; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 7457; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 7458; CHECK-NEXT: movd %xmm0, %eax 7459; CHECK-NEXT: xorps %xmm0, %xmm0 7460; CHECK-NEXT: cvtsi2ss %rax, %xmm0 7461; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 7462; CHECK-NEXT: movaps %xmm1, %xmm0 7463; CHECK-NEXT: retq 7464; 7465; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i32: 7466; AVX1: # %bb.0: # %entry 7467; AVX1-NEXT: vextractps $1, %xmm0, %eax 7468; AVX1-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 7469; AVX1-NEXT: vmovd %xmm0, %eax 7470; AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 7471; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 7472; AVX1-NEXT: vpextrd $2, %xmm0, %eax 7473; AVX1-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0 7474; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 7475; AVX1-NEXT: retq 7476; 7477; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i32: 7478; AVX512: # %bb.0: # %entry 7479; AVX512-NEXT: vextractps $1, %xmm0, %eax 7480; AVX512-NEXT: vcvtusi2ss %eax, %xmm1, %xmm1 7481; AVX512-NEXT: vmovd %xmm0, %eax 7482; AVX512-NEXT: vcvtusi2ss %eax, %xmm2, %xmm2 7483; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 7484; AVX512-NEXT: vpextrd $2, %xmm0, %eax 7485; AVX512-NEXT: vcvtusi2ss %eax, %xmm3, %xmm0 7486; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 7487; AVX512-NEXT: retq 7488entry: 7489 %result = call <3 x float> 7490 @llvm.experimental.constrained.uitofp.v3f32.v3i32(<3 x i32> %x, 7491 metadata !"round.dynamic", 7492 metadata !"fpexcept.strict") #0 7493 ret <3 x float> %result 7494} 7495 7496define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 { 7497; CHECK-LABEL: constrained_vector_uitofp_v3f64_v3i64: 7498; CHECK: # %bb.0: # %entry 7499; CHECK-NEXT: movq %rdi, %rax 7500; CHECK-NEXT: shrq %rax 7501; CHECK-NEXT: movl %edi, %ecx 7502; CHECK-NEXT: andl $1, %ecx 7503; CHECK-NEXT: orq %rax, %rcx 7504; CHECK-NEXT: testq %rdi, %rdi 7505; CHECK-NEXT: cmovnsq %rdi, %rcx 7506; CHECK-NEXT: cvtsi2sd %rcx, %xmm0 7507; CHECK-NEXT: jns .LBB183_2 7508; CHECK-NEXT: # %bb.1: 7509; CHECK-NEXT: addsd %xmm0, %xmm0 7510; CHECK-NEXT: .LBB183_2: # %entry 7511; CHECK-NEXT: movq %rsi, %rax 7512; CHECK-NEXT: shrq %rax 7513; CHECK-NEXT: movl %esi, %ecx 7514; CHECK-NEXT: andl $1, %ecx 7515; CHECK-NEXT: orq %rax, %rcx 7516; CHECK-NEXT: testq %rsi, %rsi 7517; CHECK-NEXT: cmovnsq %rsi, %rcx 7518; CHECK-NEXT: cvtsi2sd %rcx, %xmm1 7519; CHECK-NEXT: jns .LBB183_4 7520; CHECK-NEXT: # %bb.3: 7521; CHECK-NEXT: addsd %xmm1, %xmm1 7522; CHECK-NEXT: .LBB183_4: # %entry 7523; CHECK-NEXT: movq %rdx, %rax 7524; CHECK-NEXT: shrq %rax 7525; CHECK-NEXT: movl %edx, %ecx 7526; CHECK-NEXT: andl $1, %ecx 7527; CHECK-NEXT: orq %rax, %rcx 7528; CHECK-NEXT: testq %rdx, %rdx 7529; CHECK-NEXT: cmovnsq %rdx, %rcx 7530; CHECK-NEXT: cvtsi2sd %rcx, %xmm2 7531; CHECK-NEXT: jns .LBB183_6 7532; CHECK-NEXT: # %bb.5: 7533; CHECK-NEXT: addsd %xmm2, %xmm2 7534; CHECK-NEXT: .LBB183_6: # %entry 7535; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) 7536; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) 7537; CHECK-NEXT: wait 7538; CHECK-NEXT: retq 7539; 7540; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i64: 7541; AVX1: # %bb.0: # %entry 7542; AVX1-NEXT: vpextrq $1, %xmm0, %rax 7543; AVX1-NEXT: movq %rax, %rcx 7544; AVX1-NEXT: shrq %rcx 7545; AVX1-NEXT: movl %eax, %edx 7546; AVX1-NEXT: andl $1, %edx 7547; AVX1-NEXT: orq %rcx, %rdx 7548; AVX1-NEXT: testq %rax, %rax 7549; AVX1-NEXT: cmovnsq %rax, %rdx 7550; AVX1-NEXT: vcvtsi2sd %rdx, %xmm1, %xmm1 7551; AVX1-NEXT: jns .LBB183_2 7552; AVX1-NEXT: # %bb.1: 7553; AVX1-NEXT: vaddsd %xmm1, %xmm1, %xmm1 7554; AVX1-NEXT: .LBB183_2: # %entry 7555; AVX1-NEXT: vmovq %xmm0, %rax 7556; AVX1-NEXT: movq %rax, %rcx 7557; AVX1-NEXT: shrq %rcx 7558; AVX1-NEXT: movl %eax, %edx 7559; AVX1-NEXT: andl $1, %edx 7560; AVX1-NEXT: orq %rcx, %rdx 7561; AVX1-NEXT: testq %rax, %rax 7562; AVX1-NEXT: cmovnsq %rax, %rdx 7563; AVX1-NEXT: vcvtsi2sd %rdx, %xmm2, %xmm2 7564; AVX1-NEXT: jns .LBB183_4 7565; AVX1-NEXT: # %bb.3: 7566; AVX1-NEXT: vaddsd %xmm2, %xmm2, %xmm2 7567; AVX1-NEXT: .LBB183_4: # %entry 7568; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 7569; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 7570; AVX1-NEXT: vmovq %xmm0, %rax 7571; AVX1-NEXT: movq %rax, %rcx 7572; AVX1-NEXT: shrq %rcx 7573; AVX1-NEXT: movl %eax, %edx 7574; AVX1-NEXT: andl $1, %edx 7575; AVX1-NEXT: orq %rcx, %rdx 7576; AVX1-NEXT: testq %rax, %rax 7577; AVX1-NEXT: cmovnsq %rax, %rdx 7578; AVX1-NEXT: vcvtsi2sd %rdx, %xmm3, %xmm0 7579; AVX1-NEXT: jns .LBB183_6 7580; AVX1-NEXT: # %bb.5: 7581; AVX1-NEXT: vaddsd %xmm0, %xmm0, %xmm0 7582; AVX1-NEXT: .LBB183_6: # %entry 7583; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 7584; AVX1-NEXT: retq 7585; 7586; AVX512-LABEL: constrained_vector_uitofp_v3f64_v3i64: 7587; AVX512: # %bb.0: # %entry 7588; AVX512-NEXT: vpextrq $1, %xmm0, %rax 7589; AVX512-NEXT: vcvtusi2sd %rax, %xmm1, %xmm1 7590; AVX512-NEXT: vmovq %xmm0, %rax 7591; AVX512-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2 7592; AVX512-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 7593; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 7594; AVX512-NEXT: vmovq %xmm0, %rax 7595; AVX512-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0 7596; AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 7597; AVX512-NEXT: retq 7598entry: 7599 %result = call <3 x double> 7600 @llvm.experimental.constrained.uitofp.v3f64.v3i64(<3 x i64> %x, 7601 metadata !"round.dynamic", 7602 metadata !"fpexcept.strict") #0 7603 ret <3 x double> %result 7604} 7605 7606define <3 x float> @constrained_vector_uitofp_v3f32_v3i64(<3 x i64> %x) #0 { 7607; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i64: 7608; CHECK: # %bb.0: # %entry 7609; CHECK-NEXT: movq %rsi, %rax 7610; CHECK-NEXT: shrq %rax 7611; CHECK-NEXT: movl %esi, %ecx 7612; CHECK-NEXT: andl $1, %ecx 7613; CHECK-NEXT: orq %rax, %rcx 7614; CHECK-NEXT: testq %rsi, %rsi 7615; CHECK-NEXT: cmovnsq %rsi, %rcx 7616; CHECK-NEXT: cvtsi2ss %rcx, %xmm1 7617; CHECK-NEXT: jns .LBB184_2 7618; CHECK-NEXT: # %bb.1: 7619; CHECK-NEXT: addss %xmm1, %xmm1 7620; CHECK-NEXT: .LBB184_2: # %entry 7621; CHECK-NEXT: movq %rdi, %rax 7622; CHECK-NEXT: shrq %rax 7623; CHECK-NEXT: movl %edi, %ecx 7624; CHECK-NEXT: andl $1, %ecx 7625; CHECK-NEXT: orq %rax, %rcx 7626; CHECK-NEXT: testq %rdi, %rdi 7627; CHECK-NEXT: cmovnsq %rdi, %rcx 7628; CHECK-NEXT: cvtsi2ss %rcx, %xmm0 7629; CHECK-NEXT: jns .LBB184_4 7630; CHECK-NEXT: # %bb.3: 7631; CHECK-NEXT: addss %xmm0, %xmm0 7632; CHECK-NEXT: .LBB184_4: # %entry 7633; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 7634; CHECK-NEXT: movq %rdx, %rax 7635; CHECK-NEXT: shrq %rax 7636; CHECK-NEXT: movl %edx, %ecx 7637; CHECK-NEXT: andl $1, %ecx 7638; CHECK-NEXT: orq %rax, %rcx 7639; CHECK-NEXT: testq %rdx, %rdx 7640; CHECK-NEXT: cmovnsq %rdx, %rcx 7641; CHECK-NEXT: xorps %xmm1, %xmm1 7642; CHECK-NEXT: cvtsi2ss %rcx, %xmm1 7643; CHECK-NEXT: jns .LBB184_6 7644; CHECK-NEXT: # %bb.5: 7645; CHECK-NEXT: addss %xmm1, %xmm1 7646; CHECK-NEXT: .LBB184_6: # %entry 7647; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 7648; CHECK-NEXT: retq 7649; 7650; AVX1-LABEL: constrained_vector_uitofp_v3f32_v3i64: 7651; AVX1: # %bb.0: # %entry 7652; AVX1-NEXT: vpextrq $1, %xmm0, %rax 7653; AVX1-NEXT: movq %rax, %rcx 7654; AVX1-NEXT: shrq %rcx 7655; AVX1-NEXT: movl %eax, %edx 7656; AVX1-NEXT: andl $1, %edx 7657; AVX1-NEXT: orq %rcx, %rdx 7658; AVX1-NEXT: testq %rax, %rax 7659; AVX1-NEXT: cmovnsq %rax, %rdx 7660; AVX1-NEXT: vcvtsi2ss %rdx, %xmm1, %xmm1 7661; AVX1-NEXT: jns .LBB184_2 7662; AVX1-NEXT: # %bb.1: 7663; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1 7664; AVX1-NEXT: .LBB184_2: # %entry 7665; AVX1-NEXT: vmovq %xmm0, %rax 7666; AVX1-NEXT: movq %rax, %rcx 7667; AVX1-NEXT: shrq %rcx 7668; AVX1-NEXT: movl %eax, %edx 7669; AVX1-NEXT: andl $1, %edx 7670; AVX1-NEXT: orq %rcx, %rdx 7671; AVX1-NEXT: testq %rax, %rax 7672; AVX1-NEXT: cmovnsq %rax, %rdx 7673; AVX1-NEXT: vcvtsi2ss %rdx, %xmm2, %xmm2 7674; AVX1-NEXT: jns .LBB184_4 7675; AVX1-NEXT: # %bb.3: 7676; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 7677; AVX1-NEXT: .LBB184_4: # %entry 7678; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 7679; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 7680; AVX1-NEXT: vmovq %xmm0, %rax 7681; AVX1-NEXT: movq %rax, %rcx 7682; AVX1-NEXT: shrq %rcx 7683; AVX1-NEXT: movl %eax, %edx 7684; AVX1-NEXT: andl $1, %edx 7685; AVX1-NEXT: orq %rcx, %rdx 7686; AVX1-NEXT: testq %rax, %rax 7687; AVX1-NEXT: cmovnsq %rax, %rdx 7688; AVX1-NEXT: vcvtsi2ss %rdx, %xmm3, %xmm0 7689; AVX1-NEXT: jns .LBB184_6 7690; AVX1-NEXT: # %bb.5: 7691; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm0 7692; AVX1-NEXT: .LBB184_6: # %entry 7693; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 7694; AVX1-NEXT: vzeroupper 7695; AVX1-NEXT: retq 7696; 7697; AVX512-LABEL: constrained_vector_uitofp_v3f32_v3i64: 7698; AVX512: # %bb.0: # %entry 7699; AVX512-NEXT: vpextrq $1, %xmm0, %rax 7700; AVX512-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 7701; AVX512-NEXT: vmovq %xmm0, %rax 7702; AVX512-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2 7703; AVX512-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 7704; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 7705; AVX512-NEXT: vmovq %xmm0, %rax 7706; AVX512-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0 7707; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3] 7708; AVX512-NEXT: vzeroupper 7709; AVX512-NEXT: retq 7710entry: 7711 %result = call <3 x float> 7712 @llvm.experimental.constrained.uitofp.v3f32.v3i64(<3 x i64> %x, 7713 metadata !"round.dynamic", 7714 metadata !"fpexcept.strict") #0 7715 ret <3 x float> %result 7716} 7717 7718define <4 x double> @constrained_vector_uitofp_v4f64_v4i32(<4 x i32> %x) #0 { 7719; CHECK-LABEL: constrained_vector_uitofp_v4f64_v4i32: 7720; CHECK: # %bb.0: # %entry 7721; CHECK-NEXT: xorpd %xmm2, %xmm2 7722; CHECK-NEXT: movapd %xmm0, %xmm1 7723; CHECK-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 7724; CHECK-NEXT: movapd {{.*#+}} xmm3 = [4.503599627370496E+15,4.503599627370496E+15] 7725; CHECK-NEXT: orpd %xmm3, %xmm1 7726; CHECK-NEXT: subpd %xmm3, %xmm1 7727; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 7728; CHECK-NEXT: orpd %xmm3, %xmm0 7729; CHECK-NEXT: subpd %xmm3, %xmm0 7730; CHECK-NEXT: retq 7731; 7732; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i32: 7733; AVX1: # %bb.0: # %entry 7734; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 7735; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 7736; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 7737; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 7738; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm1 = [4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15,4.503599627370496E+15] 7739; AVX1-NEXT: vorpd %ymm1, %ymm0, %ymm0 7740; AVX1-NEXT: vsubpd %ymm1, %ymm0, %ymm0 7741; AVX1-NEXT: retq 7742; 7743; AVX512-LABEL: constrained_vector_uitofp_v4f64_v4i32: 7744; AVX512: # %bb.0: # %entry 7745; AVX512-NEXT: vmovaps %xmm0, %xmm0 7746; AVX512-NEXT: vcvtudq2pd %ymm0, %zmm0 7747; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 7748; AVX512-NEXT: retq 7749entry: 7750 %result = call <4 x double> 7751 @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32> %x, 7752 metadata !"round.dynamic", 7753 metadata !"fpexcept.strict") #0 7754 ret <4 x double> %result 7755} 7756 7757define <4 x float> @constrained_vector_uitofp_v4f32_v4i32(<4 x i32> %x) #0 { 7758; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i32: 7759; CHECK: # %bb.0: # %entry 7760; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] 7761; CHECK-NEXT: pand %xmm0, %xmm1 7762; CHECK-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 7763; CHECK-NEXT: psrld $16, %xmm0 7764; CHECK-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 7765; CHECK-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 7766; CHECK-NEXT: addps %xmm1, %xmm0 7767; CHECK-NEXT: retq 7768; 7769; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i32: 7770; AVX1: # %bb.0: # %entry 7771; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 7772; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 7773; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2],mem[3],xmm0[4],mem[5],xmm0[6],mem[7] 7774; AVX1-NEXT: vsubps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 7775; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0 7776; AVX1-NEXT: retq 7777; 7778; AVX512-LABEL: constrained_vector_uitofp_v4f32_v4i32: 7779; AVX512: # %bb.0: # %entry 7780; AVX512-NEXT: vmovaps %xmm0, %xmm0 7781; AVX512-NEXT: vcvtudq2ps %zmm0, %zmm0 7782; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 7783; AVX512-NEXT: vzeroupper 7784; AVX512-NEXT: retq 7785entry: 7786 %result = call <4 x float> 7787 @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32> %x, 7788 metadata !"round.dynamic", 7789 metadata !"fpexcept.strict") #0 7790 ret <4 x float> %result 7791} 7792 7793define <4 x double> @constrained_vector_uitofp_v4f64_v4i64(<4 x i64> %x) #0 { 7794; CHECK-LABEL: constrained_vector_uitofp_v4f64_v4i64: 7795; CHECK: # %bb.0: # %entry 7796; CHECK-NEXT: movdqa %xmm0, %xmm2 7797; CHECK-NEXT: movq %xmm0, %rax 7798; CHECK-NEXT: movq %rax, %rcx 7799; CHECK-NEXT: shrq %rcx 7800; CHECK-NEXT: movl %eax, %edx 7801; CHECK-NEXT: andl $1, %edx 7802; CHECK-NEXT: orq %rcx, %rdx 7803; CHECK-NEXT: testq %rax, %rax 7804; CHECK-NEXT: cmovnsq %rax, %rdx 7805; CHECK-NEXT: xorps %xmm0, %xmm0 7806; CHECK-NEXT: cvtsi2sd %rdx, %xmm0 7807; CHECK-NEXT: jns .LBB187_2 7808; CHECK-NEXT: # %bb.1: 7809; CHECK-NEXT: addsd %xmm0, %xmm0 7810; CHECK-NEXT: .LBB187_2: # %entry 7811; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,2,3] 7812; CHECK-NEXT: movq %xmm2, %rax 7813; CHECK-NEXT: movq %rax, %rcx 7814; CHECK-NEXT: shrq %rcx 7815; CHECK-NEXT: movl %eax, %edx 7816; CHECK-NEXT: andl $1, %edx 7817; CHECK-NEXT: orq %rcx, %rdx 7818; CHECK-NEXT: testq %rax, %rax 7819; CHECK-NEXT: cmovnsq %rax, %rdx 7820; CHECK-NEXT: cvtsi2sd %rdx, %xmm3 7821; CHECK-NEXT: jns .LBB187_4 7822; CHECK-NEXT: # %bb.3: 7823; CHECK-NEXT: addsd %xmm3, %xmm3 7824; CHECK-NEXT: .LBB187_4: # %entry 7825; CHECK-NEXT: movq %xmm1, %rax 7826; CHECK-NEXT: movq %rax, %rcx 7827; CHECK-NEXT: shrq %rcx 7828; CHECK-NEXT: movl %eax, %edx 7829; CHECK-NEXT: andl $1, %edx 7830; CHECK-NEXT: orq %rcx, %rdx 7831; CHECK-NEXT: testq %rax, %rax 7832; CHECK-NEXT: cmovnsq %rax, %rdx 7833; CHECK-NEXT: xorps %xmm2, %xmm2 7834; CHECK-NEXT: cvtsi2sd %rdx, %xmm2 7835; CHECK-NEXT: jns .LBB187_6 7836; CHECK-NEXT: # %bb.5: 7837; CHECK-NEXT: addsd %xmm2, %xmm2 7838; CHECK-NEXT: .LBB187_6: # %entry 7839; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] 7840; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 7841; CHECK-NEXT: movq %xmm1, %rax 7842; CHECK-NEXT: movq %rax, %rcx 7843; CHECK-NEXT: shrq %rcx 7844; CHECK-NEXT: movl %eax, %edx 7845; CHECK-NEXT: andl $1, %edx 7846; CHECK-NEXT: orq %rcx, %rdx 7847; CHECK-NEXT: testq %rax, %rax 7848; CHECK-NEXT: cmovnsq %rax, %rdx 7849; CHECK-NEXT: xorps %xmm1, %xmm1 7850; CHECK-NEXT: cvtsi2sd %rdx, %xmm1 7851; CHECK-NEXT: jns .LBB187_8 7852; CHECK-NEXT: # %bb.7: 7853; CHECK-NEXT: addsd %xmm1, %xmm1 7854; CHECK-NEXT: .LBB187_8: # %entry 7855; CHECK-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] 7856; CHECK-NEXT: movapd %xmm2, %xmm1 7857; CHECK-NEXT: retq 7858; 7859; AVX1-LABEL: constrained_vector_uitofp_v4f64_v4i64: 7860; AVX1: # %bb.0: # %entry 7861; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 7862; AVX1-NEXT: vpextrd $2, %xmm1, %eax 7863; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 7864; AVX1-NEXT: vmovd %xmm1, %eax 7865; AVX1-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 7866; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm3[0],xmm2[0] 7867; AVX1-NEXT: vextractps $2, %xmm0, %eax 7868; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 7869; AVX1-NEXT: vmovq %xmm0, %rax 7870; AVX1-NEXT: movl %eax, %eax 7871; AVX1-NEXT: vcvtsi2sd %rax, %xmm4, %xmm4 7872; AVX1-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm4[0],xmm3[0] 7873; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 7874; AVX1-NEXT: vpextrd $3, %xmm1, %eax 7875; AVX1-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 7876; AVX1-NEXT: vpextrd $1, %xmm1, %eax 7877; AVX1-NEXT: vcvtsi2sd %rax, %xmm5, %xmm1 7878; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0] 7879; AVX1-NEXT: vpextrd $3, %xmm0, %eax 7880; AVX1-NEXT: vcvtsi2sd %rax, %xmm5, %xmm3 7881; AVX1-NEXT: vpextrd $1, %xmm0, %eax 7882; AVX1-NEXT: vcvtsi2sd %rax, %xmm5, %xmm0 7883; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] 7884; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 7885; AVX1-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 7886; AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0 7887; AVX1-NEXT: retq 7888; 7889; AVX512F-LABEL: constrained_vector_uitofp_v4f64_v4i64: 7890; AVX512F: # %bb.0: # %entry 7891; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 7892; AVX512F-NEXT: vpextrq $1, %xmm1, %rax 7893; AVX512F-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2 7894; AVX512F-NEXT: vmovq %xmm1, %rax 7895; AVX512F-NEXT: vcvtusi2sd %rax, %xmm3, %xmm1 7896; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 7897; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 7898; AVX512F-NEXT: vcvtusi2sd %rax, %xmm3, %xmm2 7899; AVX512F-NEXT: vmovq %xmm0, %rax 7900; AVX512F-NEXT: vcvtusi2sd %rax, %xmm3, %xmm0 7901; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] 7902; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 7903; AVX512F-NEXT: retq 7904; 7905; AVX512DQ-LABEL: constrained_vector_uitofp_v4f64_v4i64: 7906; AVX512DQ: # %bb.0: # %entry 7907; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 7908; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 7909; AVX512DQ-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 7910; AVX512DQ-NEXT: retq 7911entry: 7912 %result = call <4 x double> 7913 @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64> %x, 7914 metadata !"round.dynamic", 7915 metadata !"fpexcept.strict") #0 7916 ret <4 x double> %result 7917} 7918 7919define <4 x float> @constrained_vector_uitofp_v4f32_v4i64(<4 x i64> %x) #0 { 7920; CHECK-LABEL: constrained_vector_uitofp_v4f32_v4i64: 7921; CHECK: # %bb.0: # %entry 7922; CHECK-NEXT: movq %xmm1, %rax 7923; CHECK-NEXT: movq %rax, %rcx 7924; CHECK-NEXT: shrq %rcx 7925; CHECK-NEXT: movl %eax, %edx 7926; CHECK-NEXT: andl $1, %edx 7927; CHECK-NEXT: orq %rcx, %rdx 7928; CHECK-NEXT: testq %rax, %rax 7929; CHECK-NEXT: cmovnsq %rax, %rdx 7930; CHECK-NEXT: cvtsi2ss %rdx, %xmm2 7931; CHECK-NEXT: jns .LBB188_2 7932; CHECK-NEXT: # %bb.1: 7933; CHECK-NEXT: addss %xmm2, %xmm2 7934; CHECK-NEXT: .LBB188_2: # %entry 7935; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 7936; CHECK-NEXT: movq %xmm1, %rax 7937; CHECK-NEXT: movq %rax, %rcx 7938; CHECK-NEXT: shrq %rcx 7939; CHECK-NEXT: movl %eax, %edx 7940; CHECK-NEXT: andl $1, %edx 7941; CHECK-NEXT: orq %rcx, %rdx 7942; CHECK-NEXT: testq %rax, %rax 7943; CHECK-NEXT: cmovnsq %rax, %rdx 7944; CHECK-NEXT: cvtsi2ss %rdx, %xmm3 7945; CHECK-NEXT: jns .LBB188_4 7946; CHECK-NEXT: # %bb.3: 7947; CHECK-NEXT: addss %xmm3, %xmm3 7948; CHECK-NEXT: .LBB188_4: # %entry 7949; CHECK-NEXT: movq %xmm0, %rax 7950; CHECK-NEXT: movq %rax, %rcx 7951; CHECK-NEXT: shrq %rcx 7952; CHECK-NEXT: movl %eax, %edx 7953; CHECK-NEXT: andl $1, %edx 7954; CHECK-NEXT: orq %rcx, %rdx 7955; CHECK-NEXT: testq %rax, %rax 7956; CHECK-NEXT: cmovnsq %rax, %rdx 7957; CHECK-NEXT: xorps %xmm1, %xmm1 7958; CHECK-NEXT: cvtsi2ss %rdx, %xmm1 7959; CHECK-NEXT: jns .LBB188_6 7960; CHECK-NEXT: # %bb.5: 7961; CHECK-NEXT: addss %xmm1, %xmm1 7962; CHECK-NEXT: .LBB188_6: # %entry 7963; CHECK-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] 7964; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 7965; CHECK-NEXT: movq %xmm0, %rax 7966; CHECK-NEXT: movq %rax, %rcx 7967; CHECK-NEXT: shrq %rcx 7968; CHECK-NEXT: movl %eax, %edx 7969; CHECK-NEXT: andl $1, %edx 7970; CHECK-NEXT: orq %rcx, %rdx 7971; CHECK-NEXT: testq %rax, %rax 7972; CHECK-NEXT: cmovnsq %rax, %rdx 7973; CHECK-NEXT: xorps %xmm0, %xmm0 7974; CHECK-NEXT: cvtsi2ss %rdx, %xmm0 7975; CHECK-NEXT: jns .LBB188_8 7976; CHECK-NEXT: # %bb.7: 7977; CHECK-NEXT: addss %xmm0, %xmm0 7978; CHECK-NEXT: .LBB188_8: # %entry 7979; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 7980; CHECK-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 7981; CHECK-NEXT: movaps %xmm1, %xmm0 7982; CHECK-NEXT: retq 7983; 7984; AVX1-LABEL: constrained_vector_uitofp_v4f32_v4i64: 7985; AVX1: # %bb.0: # %entry 7986; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm1 7987; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 7988; AVX1-NEXT: vpsrlq $1, %xmm2, %xmm3 7989; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 7990; AVX1-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 7991; AVX1-NEXT: vorpd %ymm3, %ymm1, %ymm1 7992; AVX1-NEXT: vblendvpd %ymm0, %ymm1, %ymm0, %ymm1 7993; AVX1-NEXT: vpextrq $1, %xmm1, %rax 7994; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 7995; AVX1-NEXT: vmovq %xmm1, %rax 7996; AVX1-NEXT: vcvtsi2ss %rax, %xmm4, %xmm4 7997; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm4[0],xmm3[0],xmm4[2,3] 7998; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 7999; AVX1-NEXT: vmovq %xmm1, %rax 8000; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm4 8001; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3] 8002; AVX1-NEXT: vpextrq $1, %xmm1, %rax 8003; AVX1-NEXT: vcvtsi2ss %rax, %xmm5, %xmm1 8004; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[0] 8005; AVX1-NEXT: vaddps %xmm1, %xmm1, %xmm3 8006; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 8007; AVX1-NEXT: vblendvps %xmm0, %xmm3, %xmm1, %xmm0 8008; AVX1-NEXT: vzeroupper 8009; AVX1-NEXT: retq 8010; 8011; AVX512F-LABEL: constrained_vector_uitofp_v4f32_v4i64: 8012; AVX512F: # %bb.0: # %entry 8013; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 8014; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 8015; AVX512F-NEXT: vmovq %xmm0, %rax 8016; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2 8017; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 8018; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 8019; AVX512F-NEXT: vmovq %xmm0, %rax 8020; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm2 8021; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] 8022; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 8023; AVX512F-NEXT: vcvtusi2ss %rax, %xmm3, %xmm0 8024; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 8025; AVX512F-NEXT: vzeroupper 8026; AVX512F-NEXT: retq 8027; 8028; AVX512DQ-LABEL: constrained_vector_uitofp_v4f32_v4i64: 8029; AVX512DQ: # %bb.0: # %entry 8030; AVX512DQ-NEXT: vmovaps %ymm0, %ymm0 8031; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 8032; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 8033; AVX512DQ-NEXT: vzeroupper 8034; AVX512DQ-NEXT: retq 8035entry: 8036 %result = call <4 x float> 8037 @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64> %x, 8038 metadata !"round.dynamic", 8039 metadata !"fpexcept.strict") #0 8040 ret <4 x float> %result 8041} 8042 8043; Simple test to make sure we don't fuse vselect+strict_fadd into a masked operation. 8044define <16 x float> @vpaddd_mask_test(<16 x float> %i, <16 x float> %j, <16 x i32> %mask1) nounwind readnone strictfp { 8045; CHECK-LABEL: vpaddd_mask_test: 8046; CHECK: # %bb.0: 8047; CHECK-NEXT: pxor %xmm10, %xmm10 8048; CHECK-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm8 8049; CHECK-NEXT: pcmpeqd %xmm10, %xmm8 8050; CHECK-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9 8051; CHECK-NEXT: pcmpeqd %xmm10, %xmm9 8052; CHECK-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm11 8053; CHECK-NEXT: pcmpeqd %xmm10, %xmm11 8054; CHECK-NEXT: pcmpeqd {{[0-9]+}}(%rsp), %xmm10 8055; CHECK-NEXT: addps %xmm3, %xmm7 8056; CHECK-NEXT: addps %xmm2, %xmm6 8057; CHECK-NEXT: addps %xmm1, %xmm5 8058; CHECK-NEXT: addps %xmm0, %xmm4 8059; CHECK-NEXT: andps %xmm10, %xmm0 8060; CHECK-NEXT: andnps %xmm4, %xmm10 8061; CHECK-NEXT: orps %xmm10, %xmm0 8062; CHECK-NEXT: andps %xmm11, %xmm1 8063; CHECK-NEXT: andnps %xmm5, %xmm11 8064; CHECK-NEXT: orps %xmm11, %xmm1 8065; CHECK-NEXT: andps %xmm9, %xmm2 8066; CHECK-NEXT: andnps %xmm6, %xmm9 8067; CHECK-NEXT: orps %xmm9, %xmm2 8068; CHECK-NEXT: andps %xmm8, %xmm3 8069; CHECK-NEXT: andnps %xmm7, %xmm8 8070; CHECK-NEXT: orps %xmm8, %xmm3 8071; CHECK-NEXT: retq 8072; 8073; AVX1-LABEL: vpaddd_mask_test: 8074; AVX1: # %bb.0: 8075; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm6 8076; AVX1-NEXT: vpxor %xmm7, %xmm7, %xmm7 8077; AVX1-NEXT: vpcmpeqd %xmm7, %xmm6, %xmm6 8078; AVX1-NEXT: vpcmpeqd %xmm7, %xmm5, %xmm5 8079; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm5, %ymm5 8080; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm6 8081; AVX1-NEXT: vpcmpeqd %xmm7, %xmm6, %xmm6 8082; AVX1-NEXT: vpcmpeqd %xmm7, %xmm4, %xmm4 8083; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 8084; AVX1-NEXT: vaddps %ymm3, %ymm1, %ymm3 8085; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm2 8086; AVX1-NEXT: vblendvps %ymm4, %ymm0, %ymm2, %ymm0 8087; AVX1-NEXT: vblendvps %ymm5, %ymm1, %ymm3, %ymm1 8088; AVX1-NEXT: retq 8089; 8090; AVX512-LABEL: vpaddd_mask_test: 8091; AVX512: # %bb.0: 8092; AVX512-NEXT: vptestmd %zmm2, %zmm2, %k1 8093; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm1 8094; AVX512-NEXT: vmovaps %zmm1, %zmm0 {%k1} 8095; AVX512-NEXT: retq 8096 %mask = icmp ne <16 x i32> %mask1, zeroinitializer 8097 %x = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %i, <16 x float> %j, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 8098 %r = select <16 x i1> %mask, <16 x float> %x, <16 x float> %i 8099 ret <16 x float> %r 8100} 8101 8102define <1 x float> @constrained_vector_tan_v1f32() #0 { 8103; CHECK-LABEL: constrained_vector_tan_v1f32: 8104; CHECK: # %bb.0: # %entry 8105; CHECK-NEXT: pushq %rax 8106; CHECK-NEXT: .cfi_def_cfa_offset 16 8107; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8108; CHECK-NEXT: callq tanf@PLT 8109; CHECK-NEXT: popq %rax 8110; CHECK-NEXT: .cfi_def_cfa_offset 8 8111; CHECK-NEXT: retq 8112; 8113; AVX-LABEL: constrained_vector_tan_v1f32: 8114; AVX: # %bb.0: # %entry 8115; AVX-NEXT: pushq %rax 8116; AVX-NEXT: .cfi_def_cfa_offset 16 8117; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8118; AVX-NEXT: callq tanf@PLT 8119; AVX-NEXT: popq %rax 8120; AVX-NEXT: .cfi_def_cfa_offset 8 8121; AVX-NEXT: retq 8122entry: 8123 %tan = call <1 x float> @llvm.experimental.constrained.tan.v1f32( 8124 <1 x float> <float 42.0>, 8125 metadata !"round.dynamic", 8126 metadata !"fpexcept.strict") #0 8127 ret <1 x float> %tan 8128} 8129 8130define <2 x double> @constrained_vector_tan_v2f64() #0 { 8131; CHECK-LABEL: constrained_vector_tan_v2f64: 8132; CHECK: # %bb.0: # %entry 8133; CHECK-NEXT: subq $24, %rsp 8134; CHECK-NEXT: .cfi_def_cfa_offset 32 8135; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8136; CHECK-NEXT: callq tan@PLT 8137; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8138; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8139; CHECK-NEXT: callq tan@PLT 8140; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 8141; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 8142; CHECK-NEXT: addq $24, %rsp 8143; CHECK-NEXT: .cfi_def_cfa_offset 8 8144; CHECK-NEXT: retq 8145; 8146; AVX-LABEL: constrained_vector_tan_v2f64: 8147; AVX: # %bb.0: # %entry 8148; AVX-NEXT: subq $24, %rsp 8149; AVX-NEXT: .cfi_def_cfa_offset 32 8150; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8151; AVX-NEXT: callq tan@PLT 8152; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8153; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8154; AVX-NEXT: callq tan@PLT 8155; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 8156; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8157; AVX-NEXT: addq $24, %rsp 8158; AVX-NEXT: .cfi_def_cfa_offset 8 8159; AVX-NEXT: retq 8160entry: 8161 %tan = call <2 x double> @llvm.experimental.constrained.tan.v2f64( 8162 <2 x double> <double 42.0, double 42.1>, 8163 metadata !"round.dynamic", 8164 metadata !"fpexcept.strict") #0 8165 ret <2 x double> %tan 8166} 8167 8168define <3 x float> @constrained_vector_tan_v3f32() #0 { 8169; CHECK-LABEL: constrained_vector_tan_v3f32: 8170; CHECK: # %bb.0: # %entry 8171; CHECK-NEXT: subq $40, %rsp 8172; CHECK-NEXT: .cfi_def_cfa_offset 48 8173; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 8174; CHECK-NEXT: callq tanf@PLT 8175; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8176; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8177; CHECK-NEXT: callq tanf@PLT 8178; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8179; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 8180; CHECK-NEXT: callq tanf@PLT 8181; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 8182; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8183; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 8184; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 8185; CHECK-NEXT: movaps %xmm1, %xmm0 8186; CHECK-NEXT: addq $40, %rsp 8187; CHECK-NEXT: .cfi_def_cfa_offset 8 8188; CHECK-NEXT: retq 8189; 8190; AVX-LABEL: constrained_vector_tan_v3f32: 8191; AVX: # %bb.0: # %entry 8192; AVX-NEXT: subq $40, %rsp 8193; AVX-NEXT: .cfi_def_cfa_offset 48 8194; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 8195; AVX-NEXT: callq tanf@PLT 8196; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8197; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8198; AVX-NEXT: callq tanf@PLT 8199; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8200; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 8201; AVX-NEXT: callq tanf@PLT 8202; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 8203; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 8204; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 8205; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 8206; AVX-NEXT: addq $40, %rsp 8207; AVX-NEXT: .cfi_def_cfa_offset 8 8208; AVX-NEXT: retq 8209entry: 8210 %tan = call <3 x float> @llvm.experimental.constrained.tan.v3f32( 8211 <3 x float> <float 42.0, float 43.0, float 44.0>, 8212 metadata !"round.dynamic", 8213 metadata !"fpexcept.strict") #0 8214 ret <3 x float> %tan 8215} 8216 8217define <3 x double> @constrained_vector_tan_v3f64() #0 { 8218; CHECK-LABEL: constrained_vector_tan_v3f64: 8219; CHECK: # %bb.0: # %entry 8220; CHECK-NEXT: subq $24, %rsp 8221; CHECK-NEXT: .cfi_def_cfa_offset 32 8222; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8223; CHECK-NEXT: callq tan@PLT 8224; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 8225; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8226; CHECK-NEXT: callq tan@PLT 8227; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 8228; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8229; CHECK-NEXT: callq tan@PLT 8230; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 8231; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 8232; CHECK-NEXT: wait 8233; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 8234; CHECK-NEXT: # xmm0 = mem[0],zero 8235; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 8236; CHECK-NEXT: # xmm1 = mem[0],zero 8237; CHECK-NEXT: addq $24, %rsp 8238; CHECK-NEXT: .cfi_def_cfa_offset 8 8239; CHECK-NEXT: retq 8240; 8241; AVX-LABEL: constrained_vector_tan_v3f64: 8242; AVX: # %bb.0: # %entry 8243; AVX-NEXT: subq $40, %rsp 8244; AVX-NEXT: .cfi_def_cfa_offset 48 8245; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8246; AVX-NEXT: callq tan@PLT 8247; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8248; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8249; AVX-NEXT: callq tan@PLT 8250; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 8251; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8252; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 8253; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8254; AVX-NEXT: vzeroupper 8255; AVX-NEXT: callq tan@PLT 8256; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 8257; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 8258; AVX-NEXT: addq $40, %rsp 8259; AVX-NEXT: .cfi_def_cfa_offset 8 8260; AVX-NEXT: retq 8261entry: 8262 %tan = call <3 x double> @llvm.experimental.constrained.tan.v3f64( 8263 <3 x double> <double 42.0, double 42.1, double 42.2>, 8264 metadata !"round.dynamic", 8265 metadata !"fpexcept.strict") #0 8266 ret <3 x double> %tan 8267} 8268 8269define <4 x double> @constrained_vector_tan_v4f64() #0 { 8270; CHECK-LABEL: constrained_vector_tan_v4f64: 8271; CHECK: # %bb.0: # %entry 8272; CHECK-NEXT: subq $40, %rsp 8273; CHECK-NEXT: .cfi_def_cfa_offset 48 8274; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8275; CHECK-NEXT: callq tan@PLT 8276; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8277; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8278; CHECK-NEXT: callq tan@PLT 8279; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 8280; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 8281; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8282; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 8283; CHECK-NEXT: callq tan@PLT 8284; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8285; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8286; CHECK-NEXT: callq tan@PLT 8287; CHECK-NEXT: movaps %xmm0, %xmm1 8288; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 8289; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 8290; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 8291; CHECK-NEXT: addq $40, %rsp 8292; CHECK-NEXT: .cfi_def_cfa_offset 8 8293; CHECK-NEXT: retq 8294; 8295; AVX-LABEL: constrained_vector_tan_v4f64: 8296; AVX: # %bb.0: # %entry 8297; AVX-NEXT: subq $40, %rsp 8298; AVX-NEXT: .cfi_def_cfa_offset 48 8299; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 8300; AVX-NEXT: callq tan@PLT 8301; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8302; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8303; AVX-NEXT: callq tan@PLT 8304; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 8305; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8306; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8307; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8308; AVX-NEXT: callq tan@PLT 8309; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8310; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8311; AVX-NEXT: callq tan@PLT 8312; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 8313; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8314; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 8315; AVX-NEXT: addq $40, %rsp 8316; AVX-NEXT: .cfi_def_cfa_offset 8 8317; AVX-NEXT: retq 8318entry: 8319 %tan = call <4 x double> @llvm.experimental.constrained.tan.v4f64( 8320 <4 x double> <double 42.0, double 42.1, 8321 double 42.2, double 42.3>, 8322 metadata !"round.dynamic", 8323 metadata !"fpexcept.strict") #0 8324 ret <4 x double> %tan 8325} 8326 8327define <1 x float> @constrained_vector_acos_v1f32() #0 { 8328; CHECK-LABEL: constrained_vector_acos_v1f32: 8329; CHECK: # %bb.0: # %entry 8330; CHECK-NEXT: pushq %rax 8331; CHECK-NEXT: .cfi_def_cfa_offset 16 8332; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8333; CHECK-NEXT: callq acosf@PLT 8334; CHECK-NEXT: popq %rax 8335; CHECK-NEXT: .cfi_def_cfa_offset 8 8336; CHECK-NEXT: retq 8337; 8338; AVX-LABEL: constrained_vector_acos_v1f32: 8339; AVX: # %bb.0: # %entry 8340; AVX-NEXT: pushq %rax 8341; AVX-NEXT: .cfi_def_cfa_offset 16 8342; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8343; AVX-NEXT: callq acosf@PLT 8344; AVX-NEXT: popq %rax 8345; AVX-NEXT: .cfi_def_cfa_offset 8 8346; AVX-NEXT: retq 8347entry: 8348 %acos = call <1 x float> @llvm.experimental.constrained.acos.v1f32( 8349 <1 x float> <float 42.0>, 8350 metadata !"round.dynamic", 8351 metadata !"fpexcept.strict") #0 8352 ret <1 x float> %acos 8353} 8354 8355define <2 x double> @constrained_vector_acos_v2f64() #0 { 8356; CHECK-LABEL: constrained_vector_acos_v2f64: 8357; CHECK: # %bb.0: # %entry 8358; CHECK-NEXT: subq $24, %rsp 8359; CHECK-NEXT: .cfi_def_cfa_offset 32 8360; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8361; CHECK-NEXT: callq acos@PLT 8362; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8363; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8364; CHECK-NEXT: callq acos@PLT 8365; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 8366; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 8367; CHECK-NEXT: addq $24, %rsp 8368; CHECK-NEXT: .cfi_def_cfa_offset 8 8369; CHECK-NEXT: retq 8370; 8371; AVX-LABEL: constrained_vector_acos_v2f64: 8372; AVX: # %bb.0: # %entry 8373; AVX-NEXT: subq $24, %rsp 8374; AVX-NEXT: .cfi_def_cfa_offset 32 8375; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8376; AVX-NEXT: callq acos@PLT 8377; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8378; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8379; AVX-NEXT: callq acos@PLT 8380; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 8381; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8382; AVX-NEXT: addq $24, %rsp 8383; AVX-NEXT: .cfi_def_cfa_offset 8 8384; AVX-NEXT: retq 8385entry: 8386 %acos = call <2 x double> @llvm.experimental.constrained.acos.v2f64( 8387 <2 x double> <double 42.0, double 42.1>, 8388 metadata !"round.dynamic", 8389 metadata !"fpexcept.strict") #0 8390 ret <2 x double> %acos 8391} 8392 8393define <3 x float> @constrained_vector_acos_v3f32() #0 { 8394; CHECK-LABEL: constrained_vector_acos_v3f32: 8395; CHECK: # %bb.0: # %entry 8396; CHECK-NEXT: subq $40, %rsp 8397; CHECK-NEXT: .cfi_def_cfa_offset 48 8398; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 8399; CHECK-NEXT: callq acosf@PLT 8400; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8401; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8402; CHECK-NEXT: callq acosf@PLT 8403; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8404; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 8405; CHECK-NEXT: callq acosf@PLT 8406; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 8407; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8408; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 8409; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 8410; CHECK-NEXT: movaps %xmm1, %xmm0 8411; CHECK-NEXT: addq $40, %rsp 8412; CHECK-NEXT: .cfi_def_cfa_offset 8 8413; CHECK-NEXT: retq 8414; 8415; AVX-LABEL: constrained_vector_acos_v3f32: 8416; AVX: # %bb.0: # %entry 8417; AVX-NEXT: subq $40, %rsp 8418; AVX-NEXT: .cfi_def_cfa_offset 48 8419; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 8420; AVX-NEXT: callq acosf@PLT 8421; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8422; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8423; AVX-NEXT: callq acosf@PLT 8424; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8425; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 8426; AVX-NEXT: callq acosf@PLT 8427; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 8428; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 8429; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 8430; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 8431; AVX-NEXT: addq $40, %rsp 8432; AVX-NEXT: .cfi_def_cfa_offset 8 8433; AVX-NEXT: retq 8434entry: 8435 %acos = call <3 x float> @llvm.experimental.constrained.acos.v3f32( 8436 <3 x float> <float 42.0, float 43.0, float 44.0>, 8437 metadata !"round.dynamic", 8438 metadata !"fpexcept.strict") #0 8439 ret <3 x float> %acos 8440} 8441 8442define <3 x double> @constrained_vector_acos_v3f64() #0 { 8443; CHECK-LABEL: constrained_vector_acos_v3f64: 8444; CHECK: # %bb.0: # %entry 8445; CHECK-NEXT: subq $24, %rsp 8446; CHECK-NEXT: .cfi_def_cfa_offset 32 8447; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8448; CHECK-NEXT: callq acos@PLT 8449; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 8450; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8451; CHECK-NEXT: callq acos@PLT 8452; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 8453; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8454; CHECK-NEXT: callq acos@PLT 8455; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 8456; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 8457; CHECK-NEXT: wait 8458; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 8459; CHECK-NEXT: # xmm0 = mem[0],zero 8460; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 8461; CHECK-NEXT: # xmm1 = mem[0],zero 8462; CHECK-NEXT: addq $24, %rsp 8463; CHECK-NEXT: .cfi_def_cfa_offset 8 8464; CHECK-NEXT: retq 8465; 8466; AVX-LABEL: constrained_vector_acos_v3f64: 8467; AVX: # %bb.0: # %entry 8468; AVX-NEXT: subq $40, %rsp 8469; AVX-NEXT: .cfi_def_cfa_offset 48 8470; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8471; AVX-NEXT: callq acos@PLT 8472; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8473; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8474; AVX-NEXT: callq acos@PLT 8475; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 8476; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8477; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 8478; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8479; AVX-NEXT: vzeroupper 8480; AVX-NEXT: callq acos@PLT 8481; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 8482; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 8483; AVX-NEXT: addq $40, %rsp 8484; AVX-NEXT: .cfi_def_cfa_offset 8 8485; AVX-NEXT: retq 8486entry: 8487 %acos = call <3 x double> @llvm.experimental.constrained.acos.v3f64( 8488 <3 x double> <double 42.0, double 42.1, double 42.2>, 8489 metadata !"round.dynamic", 8490 metadata !"fpexcept.strict") #0 8491 ret <3 x double> %acos 8492} 8493 8494define <4 x double> @constrained_vector_acos_v4f64() #0 { 8495; CHECK-LABEL: constrained_vector_acos_v4f64: 8496; CHECK: # %bb.0: # %entry 8497; CHECK-NEXT: subq $40, %rsp 8498; CHECK-NEXT: .cfi_def_cfa_offset 48 8499; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8500; CHECK-NEXT: callq acos@PLT 8501; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8502; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8503; CHECK-NEXT: callq acos@PLT 8504; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 8505; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 8506; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8507; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 8508; CHECK-NEXT: callq acos@PLT 8509; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8510; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8511; CHECK-NEXT: callq acos@PLT 8512; CHECK-NEXT: movaps %xmm0, %xmm1 8513; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 8514; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 8515; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 8516; CHECK-NEXT: addq $40, %rsp 8517; CHECK-NEXT: .cfi_def_cfa_offset 8 8518; CHECK-NEXT: retq 8519; 8520; AVX-LABEL: constrained_vector_acos_v4f64: 8521; AVX: # %bb.0: # %entry 8522; AVX-NEXT: subq $40, %rsp 8523; AVX-NEXT: .cfi_def_cfa_offset 48 8524; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 8525; AVX-NEXT: callq acos@PLT 8526; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8527; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8528; AVX-NEXT: callq acos@PLT 8529; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 8530; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8531; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8532; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8533; AVX-NEXT: callq acos@PLT 8534; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8535; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8536; AVX-NEXT: callq acos@PLT 8537; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 8538; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8539; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 8540; AVX-NEXT: addq $40, %rsp 8541; AVX-NEXT: .cfi_def_cfa_offset 8 8542; AVX-NEXT: retq 8543entry: 8544 %acos = call <4 x double> @llvm.experimental.constrained.acos.v4f64( 8545 <4 x double> <double 42.0, double 42.1, 8546 double 42.2, double 42.3>, 8547 metadata !"round.dynamic", 8548 metadata !"fpexcept.strict") #0 8549 ret <4 x double> %acos 8550} 8551 8552define <1 x float> @constrained_vector_asin_v1f32() #0 { 8553; CHECK-LABEL: constrained_vector_asin_v1f32: 8554; CHECK: # %bb.0: # %entry 8555; CHECK-NEXT: pushq %rax 8556; CHECK-NEXT: .cfi_def_cfa_offset 16 8557; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8558; CHECK-NEXT: callq asinf@PLT 8559; CHECK-NEXT: popq %rax 8560; CHECK-NEXT: .cfi_def_cfa_offset 8 8561; CHECK-NEXT: retq 8562; 8563; AVX-LABEL: constrained_vector_asin_v1f32: 8564; AVX: # %bb.0: # %entry 8565; AVX-NEXT: pushq %rax 8566; AVX-NEXT: .cfi_def_cfa_offset 16 8567; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8568; AVX-NEXT: callq asinf@PLT 8569; AVX-NEXT: popq %rax 8570; AVX-NEXT: .cfi_def_cfa_offset 8 8571; AVX-NEXT: retq 8572entry: 8573 %asin = call <1 x float> @llvm.experimental.constrained.asin.v1f32( 8574 <1 x float> <float 42.0>, 8575 metadata !"round.dynamic", 8576 metadata !"fpexcept.strict") #0 8577 ret <1 x float> %asin 8578} 8579 8580define <2 x double> @constrained_vector_asin_v2f64() #0 { 8581; CHECK-LABEL: constrained_vector_asin_v2f64: 8582; CHECK: # %bb.0: # %entry 8583; CHECK-NEXT: subq $24, %rsp 8584; CHECK-NEXT: .cfi_def_cfa_offset 32 8585; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8586; CHECK-NEXT: callq asin@PLT 8587; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8588; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8589; CHECK-NEXT: callq asin@PLT 8590; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 8591; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 8592; CHECK-NEXT: addq $24, %rsp 8593; CHECK-NEXT: .cfi_def_cfa_offset 8 8594; CHECK-NEXT: retq 8595; 8596; AVX-LABEL: constrained_vector_asin_v2f64: 8597; AVX: # %bb.0: # %entry 8598; AVX-NEXT: subq $24, %rsp 8599; AVX-NEXT: .cfi_def_cfa_offset 32 8600; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8601; AVX-NEXT: callq asin@PLT 8602; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8603; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8604; AVX-NEXT: callq asin@PLT 8605; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 8606; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8607; AVX-NEXT: addq $24, %rsp 8608; AVX-NEXT: .cfi_def_cfa_offset 8 8609; AVX-NEXT: retq 8610entry: 8611 %asin = call <2 x double> @llvm.experimental.constrained.asin.v2f64( 8612 <2 x double> <double 42.0, double 42.1>, 8613 metadata !"round.dynamic", 8614 metadata !"fpexcept.strict") #0 8615 ret <2 x double> %asin 8616} 8617 8618define <3 x float> @constrained_vector_asin_v3f32() #0 { 8619; CHECK-LABEL: constrained_vector_asin_v3f32: 8620; CHECK: # %bb.0: # %entry 8621; CHECK-NEXT: subq $40, %rsp 8622; CHECK-NEXT: .cfi_def_cfa_offset 48 8623; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 8624; CHECK-NEXT: callq asinf@PLT 8625; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8626; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8627; CHECK-NEXT: callq asinf@PLT 8628; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8629; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 8630; CHECK-NEXT: callq asinf@PLT 8631; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 8632; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8633; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 8634; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 8635; CHECK-NEXT: movaps %xmm1, %xmm0 8636; CHECK-NEXT: addq $40, %rsp 8637; CHECK-NEXT: .cfi_def_cfa_offset 8 8638; CHECK-NEXT: retq 8639; 8640; AVX-LABEL: constrained_vector_asin_v3f32: 8641; AVX: # %bb.0: # %entry 8642; AVX-NEXT: subq $40, %rsp 8643; AVX-NEXT: .cfi_def_cfa_offset 48 8644; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 8645; AVX-NEXT: callq asinf@PLT 8646; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8647; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8648; AVX-NEXT: callq asinf@PLT 8649; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8650; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 8651; AVX-NEXT: callq asinf@PLT 8652; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 8653; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 8654; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 8655; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 8656; AVX-NEXT: addq $40, %rsp 8657; AVX-NEXT: .cfi_def_cfa_offset 8 8658; AVX-NEXT: retq 8659entry: 8660 %asin = call <3 x float> @llvm.experimental.constrained.asin.v3f32( 8661 <3 x float> <float 42.0, float 43.0, float 44.0>, 8662 metadata !"round.dynamic", 8663 metadata !"fpexcept.strict") #0 8664 ret <3 x float> %asin 8665} 8666 8667define <3 x double> @constrained_vector_asin_v3f64() #0 { 8668; CHECK-LABEL: constrained_vector_asin_v3f64: 8669; CHECK: # %bb.0: # %entry 8670; CHECK-NEXT: subq $24, %rsp 8671; CHECK-NEXT: .cfi_def_cfa_offset 32 8672; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8673; CHECK-NEXT: callq asin@PLT 8674; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 8675; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8676; CHECK-NEXT: callq asin@PLT 8677; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 8678; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8679; CHECK-NEXT: callq asin@PLT 8680; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 8681; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 8682; CHECK-NEXT: wait 8683; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 8684; CHECK-NEXT: # xmm0 = mem[0],zero 8685; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 8686; CHECK-NEXT: # xmm1 = mem[0],zero 8687; CHECK-NEXT: addq $24, %rsp 8688; CHECK-NEXT: .cfi_def_cfa_offset 8 8689; CHECK-NEXT: retq 8690; 8691; AVX-LABEL: constrained_vector_asin_v3f64: 8692; AVX: # %bb.0: # %entry 8693; AVX-NEXT: subq $40, %rsp 8694; AVX-NEXT: .cfi_def_cfa_offset 48 8695; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8696; AVX-NEXT: callq asin@PLT 8697; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8698; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8699; AVX-NEXT: callq asin@PLT 8700; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 8701; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8702; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 8703; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8704; AVX-NEXT: vzeroupper 8705; AVX-NEXT: callq asin@PLT 8706; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 8707; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 8708; AVX-NEXT: addq $40, %rsp 8709; AVX-NEXT: .cfi_def_cfa_offset 8 8710; AVX-NEXT: retq 8711entry: 8712 %asin = call <3 x double> @llvm.experimental.constrained.asin.v3f64( 8713 <3 x double> <double 42.0, double 42.1, double 42.2>, 8714 metadata !"round.dynamic", 8715 metadata !"fpexcept.strict") #0 8716 ret <3 x double> %asin 8717} 8718 8719define <4 x double> @constrained_vector_asin_v4f64() #0 { 8720; CHECK-LABEL: constrained_vector_asin_v4f64: 8721; CHECK: # %bb.0: # %entry 8722; CHECK-NEXT: subq $40, %rsp 8723; CHECK-NEXT: .cfi_def_cfa_offset 48 8724; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8725; CHECK-NEXT: callq asin@PLT 8726; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8727; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8728; CHECK-NEXT: callq asin@PLT 8729; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 8730; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 8731; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8732; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 8733; CHECK-NEXT: callq asin@PLT 8734; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8735; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8736; CHECK-NEXT: callq asin@PLT 8737; CHECK-NEXT: movaps %xmm0, %xmm1 8738; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 8739; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 8740; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 8741; CHECK-NEXT: addq $40, %rsp 8742; CHECK-NEXT: .cfi_def_cfa_offset 8 8743; CHECK-NEXT: retq 8744; 8745; AVX-LABEL: constrained_vector_asin_v4f64: 8746; AVX: # %bb.0: # %entry 8747; AVX-NEXT: subq $40, %rsp 8748; AVX-NEXT: .cfi_def_cfa_offset 48 8749; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 8750; AVX-NEXT: callq asin@PLT 8751; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8752; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8753; AVX-NEXT: callq asin@PLT 8754; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 8755; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8756; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8757; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8758; AVX-NEXT: callq asin@PLT 8759; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8760; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8761; AVX-NEXT: callq asin@PLT 8762; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 8763; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8764; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 8765; AVX-NEXT: addq $40, %rsp 8766; AVX-NEXT: .cfi_def_cfa_offset 8 8767; AVX-NEXT: retq 8768entry: 8769 %asin = call <4 x double> @llvm.experimental.constrained.asin.v4f64( 8770 <4 x double> <double 42.0, double 42.1, 8771 double 42.2, double 42.3>, 8772 metadata !"round.dynamic", 8773 metadata !"fpexcept.strict") #0 8774 ret <4 x double> %asin 8775} 8776 8777define <1 x float> @constrained_vector_atan_v1f32() #0 { 8778; CHECK-LABEL: constrained_vector_atan_v1f32: 8779; CHECK: # %bb.0: # %entry 8780; CHECK-NEXT: pushq %rax 8781; CHECK-NEXT: .cfi_def_cfa_offset 16 8782; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8783; CHECK-NEXT: callq atanf@PLT 8784; CHECK-NEXT: popq %rax 8785; CHECK-NEXT: .cfi_def_cfa_offset 8 8786; CHECK-NEXT: retq 8787; 8788; AVX-LABEL: constrained_vector_atan_v1f32: 8789; AVX: # %bb.0: # %entry 8790; AVX-NEXT: pushq %rax 8791; AVX-NEXT: .cfi_def_cfa_offset 16 8792; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8793; AVX-NEXT: callq atanf@PLT 8794; AVX-NEXT: popq %rax 8795; AVX-NEXT: .cfi_def_cfa_offset 8 8796; AVX-NEXT: retq 8797entry: 8798 %atan = call <1 x float> @llvm.experimental.constrained.atan.v1f32( 8799 <1 x float> <float 42.0>, 8800 metadata !"round.dynamic", 8801 metadata !"fpexcept.strict") #0 8802 ret <1 x float> %atan 8803} 8804 8805define <2 x double> @constrained_vector_atan_v2f64() #0 { 8806; CHECK-LABEL: constrained_vector_atan_v2f64: 8807; CHECK: # %bb.0: # %entry 8808; CHECK-NEXT: subq $24, %rsp 8809; CHECK-NEXT: .cfi_def_cfa_offset 32 8810; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8811; CHECK-NEXT: callq atan@PLT 8812; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8813; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8814; CHECK-NEXT: callq atan@PLT 8815; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 8816; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 8817; CHECK-NEXT: addq $24, %rsp 8818; CHECK-NEXT: .cfi_def_cfa_offset 8 8819; CHECK-NEXT: retq 8820; 8821; AVX-LABEL: constrained_vector_atan_v2f64: 8822; AVX: # %bb.0: # %entry 8823; AVX-NEXT: subq $24, %rsp 8824; AVX-NEXT: .cfi_def_cfa_offset 32 8825; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8826; AVX-NEXT: callq atan@PLT 8827; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8828; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8829; AVX-NEXT: callq atan@PLT 8830; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 8831; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8832; AVX-NEXT: addq $24, %rsp 8833; AVX-NEXT: .cfi_def_cfa_offset 8 8834; AVX-NEXT: retq 8835entry: 8836 %atan = call <2 x double> @llvm.experimental.constrained.atan.v2f64( 8837 <2 x double> <double 42.0, double 42.1>, 8838 metadata !"round.dynamic", 8839 metadata !"fpexcept.strict") #0 8840 ret <2 x double> %atan 8841} 8842 8843define <3 x float> @constrained_vector_atan_v3f32() #0 { 8844; CHECK-LABEL: constrained_vector_atan_v3f32: 8845; CHECK: # %bb.0: # %entry 8846; CHECK-NEXT: subq $40, %rsp 8847; CHECK-NEXT: .cfi_def_cfa_offset 48 8848; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 8849; CHECK-NEXT: callq atanf@PLT 8850; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8851; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8852; CHECK-NEXT: callq atanf@PLT 8853; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8854; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 8855; CHECK-NEXT: callq atanf@PLT 8856; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 8857; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 8858; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 8859; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 8860; CHECK-NEXT: movaps %xmm1, %xmm0 8861; CHECK-NEXT: addq $40, %rsp 8862; CHECK-NEXT: .cfi_def_cfa_offset 8 8863; CHECK-NEXT: retq 8864; 8865; AVX-LABEL: constrained_vector_atan_v3f32: 8866; AVX: # %bb.0: # %entry 8867; AVX-NEXT: subq $40, %rsp 8868; AVX-NEXT: .cfi_def_cfa_offset 48 8869; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 8870; AVX-NEXT: callq atanf@PLT 8871; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8872; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 8873; AVX-NEXT: callq atanf@PLT 8874; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8875; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 8876; AVX-NEXT: callq atanf@PLT 8877; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 8878; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 8879; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 8880; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 8881; AVX-NEXT: addq $40, %rsp 8882; AVX-NEXT: .cfi_def_cfa_offset 8 8883; AVX-NEXT: retq 8884entry: 8885 %atan = call <3 x float> @llvm.experimental.constrained.atan.v3f32( 8886 <3 x float> <float 42.0, float 43.0, float 44.0>, 8887 metadata !"round.dynamic", 8888 metadata !"fpexcept.strict") #0 8889 ret <3 x float> %atan 8890} 8891 8892define <3 x double> @constrained_vector_atan_v3f64() #0 { 8893; CHECK-LABEL: constrained_vector_atan_v3f64: 8894; CHECK: # %bb.0: # %entry 8895; CHECK-NEXT: subq $24, %rsp 8896; CHECK-NEXT: .cfi_def_cfa_offset 32 8897; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8898; CHECK-NEXT: callq atan@PLT 8899; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 8900; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8901; CHECK-NEXT: callq atan@PLT 8902; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 8903; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8904; CHECK-NEXT: callq atan@PLT 8905; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 8906; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 8907; CHECK-NEXT: wait 8908; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 8909; CHECK-NEXT: # xmm0 = mem[0],zero 8910; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 8911; CHECK-NEXT: # xmm1 = mem[0],zero 8912; CHECK-NEXT: addq $24, %rsp 8913; CHECK-NEXT: .cfi_def_cfa_offset 8 8914; CHECK-NEXT: retq 8915; 8916; AVX-LABEL: constrained_vector_atan_v3f64: 8917; AVX: # %bb.0: # %entry 8918; AVX-NEXT: subq $40, %rsp 8919; AVX-NEXT: .cfi_def_cfa_offset 48 8920; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8921; AVX-NEXT: callq atan@PLT 8922; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8923; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8924; AVX-NEXT: callq atan@PLT 8925; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 8926; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8927; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 8928; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8929; AVX-NEXT: vzeroupper 8930; AVX-NEXT: callq atan@PLT 8931; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 8932; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 8933; AVX-NEXT: addq $40, %rsp 8934; AVX-NEXT: .cfi_def_cfa_offset 8 8935; AVX-NEXT: retq 8936entry: 8937 %atan = call <3 x double> @llvm.experimental.constrained.atan.v3f64( 8938 <3 x double> <double 42.0, double 42.1, double 42.2>, 8939 metadata !"round.dynamic", 8940 metadata !"fpexcept.strict") #0 8941 ret <3 x double> %atan 8942} 8943 8944define <4 x double> @constrained_vector_atan_v4f64() #0 { 8945; CHECK-LABEL: constrained_vector_atan_v4f64: 8946; CHECK: # %bb.0: # %entry 8947; CHECK-NEXT: subq $40, %rsp 8948; CHECK-NEXT: .cfi_def_cfa_offset 48 8949; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8950; CHECK-NEXT: callq atan@PLT 8951; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8952; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8953; CHECK-NEXT: callq atan@PLT 8954; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 8955; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 8956; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 8957; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 8958; CHECK-NEXT: callq atan@PLT 8959; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8960; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8961; CHECK-NEXT: callq atan@PLT 8962; CHECK-NEXT: movaps %xmm0, %xmm1 8963; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 8964; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 8965; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 8966; CHECK-NEXT: addq $40, %rsp 8967; CHECK-NEXT: .cfi_def_cfa_offset 8 8968; CHECK-NEXT: retq 8969; 8970; AVX-LABEL: constrained_vector_atan_v4f64: 8971; AVX: # %bb.0: # %entry 8972; AVX-NEXT: subq $40, %rsp 8973; AVX-NEXT: .cfi_def_cfa_offset 48 8974; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 8975; AVX-NEXT: callq atan@PLT 8976; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8977; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 8978; AVX-NEXT: callq atan@PLT 8979; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 8980; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8981; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 8982; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 8983; AVX-NEXT: callq atan@PLT 8984; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 8985; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 8986; AVX-NEXT: callq atan@PLT 8987; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 8988; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 8989; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 8990; AVX-NEXT: addq $40, %rsp 8991; AVX-NEXT: .cfi_def_cfa_offset 8 8992; AVX-NEXT: retq 8993entry: 8994 %atan = call <4 x double> @llvm.experimental.constrained.atan.v4f64( 8995 <4 x double> <double 42.0, double 42.1, 8996 double 42.2, double 42.3>, 8997 metadata !"round.dynamic", 8998 metadata !"fpexcept.strict") #0 8999 ret <4 x double> %atan 9000} 9001 9002define <1 x float> @constrained_vector_atan2_v1f32() #0 { 9003; CHECK-LABEL: constrained_vector_atan2_v1f32: 9004; CHECK: # %bb.0: # %entry 9005; CHECK-NEXT: pushq %rax 9006; CHECK-NEXT: .cfi_def_cfa_offset 16 9007; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9008; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0] 9009; CHECK-NEXT: callq atan2f@PLT 9010; CHECK-NEXT: popq %rax 9011; CHECK-NEXT: .cfi_def_cfa_offset 8 9012; CHECK-NEXT: retq 9013; 9014; AVX-LABEL: constrained_vector_atan2_v1f32: 9015; AVX: # %bb.0: # %entry 9016; AVX-NEXT: pushq %rax 9017; AVX-NEXT: .cfi_def_cfa_offset 16 9018; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9019; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0] 9020; AVX-NEXT: callq atan2f@PLT 9021; AVX-NEXT: popq %rax 9022; AVX-NEXT: .cfi_def_cfa_offset 8 9023; AVX-NEXT: retq 9024entry: 9025 %atan2 = call <1 x float> @llvm.experimental.constrained.atan2.v1f32( 9026 <1 x float> <float 42.0>, 9027 <1 x float> <float 23.0>, 9028 metadata !"round.dynamic", 9029 metadata !"fpexcept.strict") #0 9030 ret <1 x float> %atan2 9031} 9032 9033define <2 x double> @constrained_vector_atan2_v2f64() #0 { 9034; CHECK-LABEL: constrained_vector_atan2_v2f64: 9035; CHECK: # %bb.0: # %entry 9036; CHECK-NEXT: subq $24, %rsp 9037; CHECK-NEXT: .cfi_def_cfa_offset 32 9038; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9039; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] 9040; CHECK-NEXT: callq atan2@PLT 9041; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9042; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9043; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] 9044; CHECK-NEXT: callq atan2@PLT 9045; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 9046; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 9047; CHECK-NEXT: addq $24, %rsp 9048; CHECK-NEXT: .cfi_def_cfa_offset 8 9049; CHECK-NEXT: retq 9050; 9051; AVX-LABEL: constrained_vector_atan2_v2f64: 9052; AVX: # %bb.0: # %entry 9053; AVX-NEXT: subq $24, %rsp 9054; AVX-NEXT: .cfi_def_cfa_offset 32 9055; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9056; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] 9057; AVX-NEXT: callq atan2@PLT 9058; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9059; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9060; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] 9061; AVX-NEXT: callq atan2@PLT 9062; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 9063; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9064; AVX-NEXT: addq $24, %rsp 9065; AVX-NEXT: .cfi_def_cfa_offset 8 9066; AVX-NEXT: retq 9067entry: 9068 %atan2 = call <2 x double> @llvm.experimental.constrained.atan2.v2f64( 9069 <2 x double> <double 42.0, double 42.1>, 9070 <2 x double> <double 23.0, double 23.1>, 9071 metadata !"round.dynamic", 9072 metadata !"fpexcept.strict") #0 9073 ret <2 x double> %atan2 9074} 9075 9076define <3 x float> @constrained_vector_atan2_v3f32() #0 { 9077; CHECK-LABEL: constrained_vector_atan2_v3f32: 9078; CHECK: # %bb.0: # %entry 9079; CHECK-NEXT: subq $40, %rsp 9080; CHECK-NEXT: .cfi_def_cfa_offset 48 9081; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 9082; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.5E+1,0.0E+0,0.0E+0,0.0E+0] 9083; CHECK-NEXT: callq atan2f@PLT 9084; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9085; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9086; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0] 9087; CHECK-NEXT: callq atan2f@PLT 9088; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9089; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 9090; CHECK-NEXT: movss {{.*#+}} xmm1 = [2.4E+1,0.0E+0,0.0E+0,0.0E+0] 9091; CHECK-NEXT: callq atan2f@PLT 9092; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 9093; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9094; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 9095; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 9096; CHECK-NEXT: movaps %xmm1, %xmm0 9097; CHECK-NEXT: addq $40, %rsp 9098; CHECK-NEXT: .cfi_def_cfa_offset 8 9099; CHECK-NEXT: retq 9100; 9101; AVX-LABEL: constrained_vector_atan2_v3f32: 9102; AVX: # %bb.0: # %entry 9103; AVX-NEXT: subq $40, %rsp 9104; AVX-NEXT: .cfi_def_cfa_offset 48 9105; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 9106; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.5E+1,0.0E+0,0.0E+0,0.0E+0] 9107; AVX-NEXT: callq atan2f@PLT 9108; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9109; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9110; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.3E+1,0.0E+0,0.0E+0,0.0E+0] 9111; AVX-NEXT: callq atan2f@PLT 9112; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9113; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 9114; AVX-NEXT: vmovss {{.*#+}} xmm1 = [2.4E+1,0.0E+0,0.0E+0,0.0E+0] 9115; AVX-NEXT: callq atan2f@PLT 9116; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 9117; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 9118; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 9119; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 9120; AVX-NEXT: addq $40, %rsp 9121; AVX-NEXT: .cfi_def_cfa_offset 8 9122; AVX-NEXT: retq 9123entry: 9124 %atan2 = call <3 x float> @llvm.experimental.constrained.atan2.v3f32( 9125 <3 x float> <float 42.0, float 43.0, float 44.0>, 9126 <3 x float> <float 23.0, float 24.0, float 25.0>, 9127 metadata !"round.dynamic", 9128 metadata !"fpexcept.strict") #0 9129 ret <3 x float> %atan2 9130} 9131 9132define <3 x double> @constrained_vector_atan2_v3f64() #0 { 9133; CHECK-LABEL: constrained_vector_atan2_v3f64: 9134; CHECK: # %bb.0: # %entry 9135; CHECK-NEXT: subq $24, %rsp 9136; CHECK-NEXT: .cfi_def_cfa_offset 32 9137; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9138; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] 9139; CHECK-NEXT: callq atan2@PLT 9140; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 9141; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9142; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] 9143; CHECK-NEXT: callq atan2@PLT 9144; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 9145; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9146; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0] 9147; CHECK-NEXT: callq atan2@PLT 9148; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 9149; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 9150; CHECK-NEXT: wait 9151; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 9152; CHECK-NEXT: # xmm0 = mem[0],zero 9153; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 9154; CHECK-NEXT: # xmm1 = mem[0],zero 9155; CHECK-NEXT: addq $24, %rsp 9156; CHECK-NEXT: .cfi_def_cfa_offset 8 9157; CHECK-NEXT: retq 9158; 9159; AVX-LABEL: constrained_vector_atan2_v3f64: 9160; AVX: # %bb.0: # %entry 9161; AVX-NEXT: subq $40, %rsp 9162; AVX-NEXT: .cfi_def_cfa_offset 48 9163; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9164; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] 9165; AVX-NEXT: callq atan2@PLT 9166; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9167; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9168; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] 9169; AVX-NEXT: callq atan2@PLT 9170; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 9171; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9172; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 9173; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9174; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0] 9175; AVX-NEXT: vzeroupper 9176; AVX-NEXT: callq atan2@PLT 9177; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 9178; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 9179; AVX-NEXT: addq $40, %rsp 9180; AVX-NEXT: .cfi_def_cfa_offset 8 9181; AVX-NEXT: retq 9182entry: 9183 %atan2 = call <3 x double> @llvm.experimental.constrained.atan2.v3f64( 9184 <3 x double> <double 42.0, double 42.1, double 42.2>, 9185 <3 x double> <double 23.0, double 23.1, double 23.2>, 9186 metadata !"round.dynamic", 9187 metadata !"fpexcept.strict") #0 9188 ret <3 x double> %atan2 9189} 9190 9191define <4 x double> @constrained_vector_atan2_v4f64() #0 { 9192; CHECK-LABEL: constrained_vector_atan2_v4f64: 9193; CHECK: # %bb.0: # %entry 9194; CHECK-NEXT: subq $40, %rsp 9195; CHECK-NEXT: .cfi_def_cfa_offset 48 9196; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9197; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] 9198; CHECK-NEXT: callq atan2@PLT 9199; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9200; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9201; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] 9202; CHECK-NEXT: callq atan2@PLT 9203; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 9204; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 9205; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9206; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 9207; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3300000000000001E+1,0.0E+0] 9208; CHECK-NEXT: callq atan2@PLT 9209; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9210; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9211; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0] 9212; CHECK-NEXT: callq atan2@PLT 9213; CHECK-NEXT: movaps %xmm0, %xmm1 9214; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 9215; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 9216; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 9217; CHECK-NEXT: addq $40, %rsp 9218; CHECK-NEXT: .cfi_def_cfa_offset 8 9219; CHECK-NEXT: retq 9220; 9221; AVX-LABEL: constrained_vector_atan2_v4f64: 9222; AVX: # %bb.0: # %entry 9223; AVX-NEXT: subq $40, %rsp 9224; AVX-NEXT: .cfi_def_cfa_offset 48 9225; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 9226; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3300000000000001E+1,0.0E+0] 9227; AVX-NEXT: callq atan2@PLT 9228; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9229; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9230; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3199999999999999E+1,0.0E+0] 9231; AVX-NEXT: callq atan2@PLT 9232; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 9233; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9234; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9235; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9236; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3100000000000001E+1,0.0E+0] 9237; AVX-NEXT: callq atan2@PLT 9238; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9239; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9240; AVX-NEXT: vmovsd {{.*#+}} xmm1 = [2.3E+1,0.0E+0] 9241; AVX-NEXT: callq atan2@PLT 9242; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 9243; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9244; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 9245; AVX-NEXT: addq $40, %rsp 9246; AVX-NEXT: .cfi_def_cfa_offset 8 9247; AVX-NEXT: retq 9248entry: 9249 %atan2 = call <4 x double> @llvm.experimental.constrained.atan2.v4f64( 9250 <4 x double> <double 42.0, double 42.1, 9251 double 42.2, double 42.3>, 9252 <4 x double> <double 23.0, double 23.1, 9253 double 23.2, double 23.3>, 9254 metadata !"round.dynamic", 9255 metadata !"fpexcept.strict") #0 9256 ret <4 x double> %atan2 9257} 9258 9259define <1 x float> @constrained_vector_cosh_v1f32() #0 { 9260; CHECK-LABEL: constrained_vector_cosh_v1f32: 9261; CHECK: # %bb.0: # %entry 9262; CHECK-NEXT: pushq %rax 9263; CHECK-NEXT: .cfi_def_cfa_offset 16 9264; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9265; CHECK-NEXT: callq coshf@PLT 9266; CHECK-NEXT: popq %rax 9267; CHECK-NEXT: .cfi_def_cfa_offset 8 9268; CHECK-NEXT: retq 9269; 9270; AVX-LABEL: constrained_vector_cosh_v1f32: 9271; AVX: # %bb.0: # %entry 9272; AVX-NEXT: pushq %rax 9273; AVX-NEXT: .cfi_def_cfa_offset 16 9274; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9275; AVX-NEXT: callq coshf@PLT 9276; AVX-NEXT: popq %rax 9277; AVX-NEXT: .cfi_def_cfa_offset 8 9278; AVX-NEXT: retq 9279entry: 9280 %cosh = call <1 x float> @llvm.experimental.constrained.cosh.v1f32( 9281 <1 x float> <float 42.0>, 9282 metadata !"round.dynamic", 9283 metadata !"fpexcept.strict") #0 9284 ret <1 x float> %cosh 9285} 9286 9287define <2 x double> @constrained_vector_cosh_v2f64() #0 { 9288; CHECK-LABEL: constrained_vector_cosh_v2f64: 9289; CHECK: # %bb.0: # %entry 9290; CHECK-NEXT: subq $24, %rsp 9291; CHECK-NEXT: .cfi_def_cfa_offset 32 9292; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9293; CHECK-NEXT: callq cosh@PLT 9294; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9295; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9296; CHECK-NEXT: callq cosh@PLT 9297; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 9298; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 9299; CHECK-NEXT: addq $24, %rsp 9300; CHECK-NEXT: .cfi_def_cfa_offset 8 9301; CHECK-NEXT: retq 9302; 9303; AVX-LABEL: constrained_vector_cosh_v2f64: 9304; AVX: # %bb.0: # %entry 9305; AVX-NEXT: subq $24, %rsp 9306; AVX-NEXT: .cfi_def_cfa_offset 32 9307; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9308; AVX-NEXT: callq cosh@PLT 9309; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9310; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9311; AVX-NEXT: callq cosh@PLT 9312; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 9313; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9314; AVX-NEXT: addq $24, %rsp 9315; AVX-NEXT: .cfi_def_cfa_offset 8 9316; AVX-NEXT: retq 9317entry: 9318 %cosh = call <2 x double> @llvm.experimental.constrained.cosh.v2f64( 9319 <2 x double> <double 42.0, double 42.1>, 9320 metadata !"round.dynamic", 9321 metadata !"fpexcept.strict") #0 9322 ret <2 x double> %cosh 9323} 9324 9325define <3 x float> @constrained_vector_cosh_v3f32() #0 { 9326; CHECK-LABEL: constrained_vector_cosh_v3f32: 9327; CHECK: # %bb.0: # %entry 9328; CHECK-NEXT: subq $40, %rsp 9329; CHECK-NEXT: .cfi_def_cfa_offset 48 9330; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 9331; CHECK-NEXT: callq coshf@PLT 9332; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9333; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9334; CHECK-NEXT: callq coshf@PLT 9335; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9336; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 9337; CHECK-NEXT: callq coshf@PLT 9338; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 9339; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9340; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 9341; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 9342; CHECK-NEXT: movaps %xmm1, %xmm0 9343; CHECK-NEXT: addq $40, %rsp 9344; CHECK-NEXT: .cfi_def_cfa_offset 8 9345; CHECK-NEXT: retq 9346; 9347; AVX-LABEL: constrained_vector_cosh_v3f32: 9348; AVX: # %bb.0: # %entry 9349; AVX-NEXT: subq $40, %rsp 9350; AVX-NEXT: .cfi_def_cfa_offset 48 9351; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 9352; AVX-NEXT: callq coshf@PLT 9353; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9354; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9355; AVX-NEXT: callq coshf@PLT 9356; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9357; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 9358; AVX-NEXT: callq coshf@PLT 9359; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 9360; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 9361; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 9362; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 9363; AVX-NEXT: addq $40, %rsp 9364; AVX-NEXT: .cfi_def_cfa_offset 8 9365; AVX-NEXT: retq 9366entry: 9367 %cosh = call <3 x float> @llvm.experimental.constrained.cosh.v3f32( 9368 <3 x float> <float 42.0, float 43.0, float 44.0>, 9369 metadata !"round.dynamic", 9370 metadata !"fpexcept.strict") #0 9371 ret <3 x float> %cosh 9372} 9373 9374define <3 x double> @constrained_vector_cosh_v3f64() #0 { 9375; CHECK-LABEL: constrained_vector_cosh_v3f64: 9376; CHECK: # %bb.0: # %entry 9377; CHECK-NEXT: subq $24, %rsp 9378; CHECK-NEXT: .cfi_def_cfa_offset 32 9379; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9380; CHECK-NEXT: callq cosh@PLT 9381; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 9382; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9383; CHECK-NEXT: callq cosh@PLT 9384; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 9385; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9386; CHECK-NEXT: callq cosh@PLT 9387; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 9388; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 9389; CHECK-NEXT: wait 9390; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 9391; CHECK-NEXT: # xmm0 = mem[0],zero 9392; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 9393; CHECK-NEXT: # xmm1 = mem[0],zero 9394; CHECK-NEXT: addq $24, %rsp 9395; CHECK-NEXT: .cfi_def_cfa_offset 8 9396; CHECK-NEXT: retq 9397; 9398; AVX-LABEL: constrained_vector_cosh_v3f64: 9399; AVX: # %bb.0: # %entry 9400; AVX-NEXT: subq $40, %rsp 9401; AVX-NEXT: .cfi_def_cfa_offset 48 9402; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9403; AVX-NEXT: callq cosh@PLT 9404; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9405; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9406; AVX-NEXT: callq cosh@PLT 9407; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 9408; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9409; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 9410; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9411; AVX-NEXT: vzeroupper 9412; AVX-NEXT: callq cosh@PLT 9413; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 9414; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 9415; AVX-NEXT: addq $40, %rsp 9416; AVX-NEXT: .cfi_def_cfa_offset 8 9417; AVX-NEXT: retq 9418entry: 9419 %cosh = call <3 x double> @llvm.experimental.constrained.cosh.v3f64( 9420 <3 x double> <double 42.0, double 42.1, double 42.2>, 9421 metadata !"round.dynamic", 9422 metadata !"fpexcept.strict") #0 9423 ret <3 x double> %cosh 9424} 9425 9426define <4 x double> @constrained_vector_cosh_v4f64() #0 { 9427; CHECK-LABEL: constrained_vector_cosh_v4f64: 9428; CHECK: # %bb.0: # %entry 9429; CHECK-NEXT: subq $40, %rsp 9430; CHECK-NEXT: .cfi_def_cfa_offset 48 9431; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9432; CHECK-NEXT: callq cosh@PLT 9433; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9434; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9435; CHECK-NEXT: callq cosh@PLT 9436; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 9437; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 9438; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9439; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 9440; CHECK-NEXT: callq cosh@PLT 9441; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9442; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9443; CHECK-NEXT: callq cosh@PLT 9444; CHECK-NEXT: movaps %xmm0, %xmm1 9445; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 9446; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 9447; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 9448; CHECK-NEXT: addq $40, %rsp 9449; CHECK-NEXT: .cfi_def_cfa_offset 8 9450; CHECK-NEXT: retq 9451; 9452; AVX-LABEL: constrained_vector_cosh_v4f64: 9453; AVX: # %bb.0: # %entry 9454; AVX-NEXT: subq $40, %rsp 9455; AVX-NEXT: .cfi_def_cfa_offset 48 9456; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 9457; AVX-NEXT: callq cosh@PLT 9458; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9459; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9460; AVX-NEXT: callq cosh@PLT 9461; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 9462; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9463; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9464; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9465; AVX-NEXT: callq cosh@PLT 9466; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9467; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9468; AVX-NEXT: callq cosh@PLT 9469; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 9470; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9471; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 9472; AVX-NEXT: addq $40, %rsp 9473; AVX-NEXT: .cfi_def_cfa_offset 8 9474; AVX-NEXT: retq 9475entry: 9476 %cosh = call <4 x double> @llvm.experimental.constrained.cosh.v4f64( 9477 <4 x double> <double 42.0, double 42.1, 9478 double 42.2, double 42.3>, 9479 metadata !"round.dynamic", 9480 metadata !"fpexcept.strict") #0 9481 ret <4 x double> %cosh 9482} 9483 9484define <1 x float> @constrained_vector_sinh_v1f32() #0 { 9485; CHECK-LABEL: constrained_vector_sinh_v1f32: 9486; CHECK: # %bb.0: # %entry 9487; CHECK-NEXT: pushq %rax 9488; CHECK-NEXT: .cfi_def_cfa_offset 16 9489; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9490; CHECK-NEXT: callq sinhf@PLT 9491; CHECK-NEXT: popq %rax 9492; CHECK-NEXT: .cfi_def_cfa_offset 8 9493; CHECK-NEXT: retq 9494; 9495; AVX-LABEL: constrained_vector_sinh_v1f32: 9496; AVX: # %bb.0: # %entry 9497; AVX-NEXT: pushq %rax 9498; AVX-NEXT: .cfi_def_cfa_offset 16 9499; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9500; AVX-NEXT: callq sinhf@PLT 9501; AVX-NEXT: popq %rax 9502; AVX-NEXT: .cfi_def_cfa_offset 8 9503; AVX-NEXT: retq 9504entry: 9505 %sinh = call <1 x float> @llvm.experimental.constrained.sinh.v1f32( 9506 <1 x float> <float 42.0>, 9507 metadata !"round.dynamic", 9508 metadata !"fpexcept.strict") #0 9509 ret <1 x float> %sinh 9510} 9511 9512define <2 x double> @constrained_vector_sinh_v2f64() #0 { 9513; CHECK-LABEL: constrained_vector_sinh_v2f64: 9514; CHECK: # %bb.0: # %entry 9515; CHECK-NEXT: subq $24, %rsp 9516; CHECK-NEXT: .cfi_def_cfa_offset 32 9517; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9518; CHECK-NEXT: callq sinh@PLT 9519; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9520; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9521; CHECK-NEXT: callq sinh@PLT 9522; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 9523; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 9524; CHECK-NEXT: addq $24, %rsp 9525; CHECK-NEXT: .cfi_def_cfa_offset 8 9526; CHECK-NEXT: retq 9527; 9528; AVX-LABEL: constrained_vector_sinh_v2f64: 9529; AVX: # %bb.0: # %entry 9530; AVX-NEXT: subq $24, %rsp 9531; AVX-NEXT: .cfi_def_cfa_offset 32 9532; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9533; AVX-NEXT: callq sinh@PLT 9534; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9535; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9536; AVX-NEXT: callq sinh@PLT 9537; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 9538; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9539; AVX-NEXT: addq $24, %rsp 9540; AVX-NEXT: .cfi_def_cfa_offset 8 9541; AVX-NEXT: retq 9542entry: 9543 %sinh = call <2 x double> @llvm.experimental.constrained.sinh.v2f64( 9544 <2 x double> <double 42.0, double 42.1>, 9545 metadata !"round.dynamic", 9546 metadata !"fpexcept.strict") #0 9547 ret <2 x double> %sinh 9548} 9549 9550define <3 x float> @constrained_vector_sinh_v3f32() #0 { 9551; CHECK-LABEL: constrained_vector_sinh_v3f32: 9552; CHECK: # %bb.0: # %entry 9553; CHECK-NEXT: subq $40, %rsp 9554; CHECK-NEXT: .cfi_def_cfa_offset 48 9555; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 9556; CHECK-NEXT: callq sinhf@PLT 9557; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9558; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9559; CHECK-NEXT: callq sinhf@PLT 9560; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9561; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 9562; CHECK-NEXT: callq sinhf@PLT 9563; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 9564; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9565; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 9566; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 9567; CHECK-NEXT: movaps %xmm1, %xmm0 9568; CHECK-NEXT: addq $40, %rsp 9569; CHECK-NEXT: .cfi_def_cfa_offset 8 9570; CHECK-NEXT: retq 9571; 9572; AVX-LABEL: constrained_vector_sinh_v3f32: 9573; AVX: # %bb.0: # %entry 9574; AVX-NEXT: subq $40, %rsp 9575; AVX-NEXT: .cfi_def_cfa_offset 48 9576; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 9577; AVX-NEXT: callq sinhf@PLT 9578; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9579; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9580; AVX-NEXT: callq sinhf@PLT 9581; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9582; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 9583; AVX-NEXT: callq sinhf@PLT 9584; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 9585; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 9586; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 9587; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 9588; AVX-NEXT: addq $40, %rsp 9589; AVX-NEXT: .cfi_def_cfa_offset 8 9590; AVX-NEXT: retq 9591entry: 9592 %sinh = call <3 x float> @llvm.experimental.constrained.sinh.v3f32( 9593 <3 x float> <float 42.0, float 43.0, float 44.0>, 9594 metadata !"round.dynamic", 9595 metadata !"fpexcept.strict") #0 9596 ret <3 x float> %sinh 9597} 9598 9599define <3 x double> @constrained_vector_sinh_v3f64() #0 { 9600; CHECK-LABEL: constrained_vector_sinh_v3f64: 9601; CHECK: # %bb.0: # %entry 9602; CHECK-NEXT: subq $24, %rsp 9603; CHECK-NEXT: .cfi_def_cfa_offset 32 9604; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9605; CHECK-NEXT: callq sinh@PLT 9606; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 9607; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9608; CHECK-NEXT: callq sinh@PLT 9609; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 9610; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9611; CHECK-NEXT: callq sinh@PLT 9612; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 9613; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 9614; CHECK-NEXT: wait 9615; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 9616; CHECK-NEXT: # xmm0 = mem[0],zero 9617; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 9618; CHECK-NEXT: # xmm1 = mem[0],zero 9619; CHECK-NEXT: addq $24, %rsp 9620; CHECK-NEXT: .cfi_def_cfa_offset 8 9621; CHECK-NEXT: retq 9622; 9623; AVX-LABEL: constrained_vector_sinh_v3f64: 9624; AVX: # %bb.0: # %entry 9625; AVX-NEXT: subq $40, %rsp 9626; AVX-NEXT: .cfi_def_cfa_offset 48 9627; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9628; AVX-NEXT: callq sinh@PLT 9629; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9630; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9631; AVX-NEXT: callq sinh@PLT 9632; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 9633; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9634; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 9635; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9636; AVX-NEXT: vzeroupper 9637; AVX-NEXT: callq sinh@PLT 9638; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 9639; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 9640; AVX-NEXT: addq $40, %rsp 9641; AVX-NEXT: .cfi_def_cfa_offset 8 9642; AVX-NEXT: retq 9643entry: 9644 %sinh = call <3 x double> @llvm.experimental.constrained.sinh.v3f64( 9645 <3 x double> <double 42.0, double 42.1, double 42.2>, 9646 metadata !"round.dynamic", 9647 metadata !"fpexcept.strict") #0 9648 ret <3 x double> %sinh 9649} 9650 9651define <4 x double> @constrained_vector_sinh_v4f64() #0 { 9652; CHECK-LABEL: constrained_vector_sinh_v4f64: 9653; CHECK: # %bb.0: # %entry 9654; CHECK-NEXT: subq $40, %rsp 9655; CHECK-NEXT: .cfi_def_cfa_offset 48 9656; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9657; CHECK-NEXT: callq sinh@PLT 9658; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9659; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9660; CHECK-NEXT: callq sinh@PLT 9661; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 9662; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 9663; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9664; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 9665; CHECK-NEXT: callq sinh@PLT 9666; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9667; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9668; CHECK-NEXT: callq sinh@PLT 9669; CHECK-NEXT: movaps %xmm0, %xmm1 9670; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 9671; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 9672; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 9673; CHECK-NEXT: addq $40, %rsp 9674; CHECK-NEXT: .cfi_def_cfa_offset 8 9675; CHECK-NEXT: retq 9676; 9677; AVX-LABEL: constrained_vector_sinh_v4f64: 9678; AVX: # %bb.0: # %entry 9679; AVX-NEXT: subq $40, %rsp 9680; AVX-NEXT: .cfi_def_cfa_offset 48 9681; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 9682; AVX-NEXT: callq sinh@PLT 9683; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9684; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9685; AVX-NEXT: callq sinh@PLT 9686; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 9687; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9688; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9689; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9690; AVX-NEXT: callq sinh@PLT 9691; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9692; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9693; AVX-NEXT: callq sinh@PLT 9694; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 9695; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9696; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 9697; AVX-NEXT: addq $40, %rsp 9698; AVX-NEXT: .cfi_def_cfa_offset 8 9699; AVX-NEXT: retq 9700entry: 9701 %sinh = call <4 x double> @llvm.experimental.constrained.sinh.v4f64( 9702 <4 x double> <double 42.0, double 42.1, 9703 double 42.2, double 42.3>, 9704 metadata !"round.dynamic", 9705 metadata !"fpexcept.strict") #0 9706 ret <4 x double> %sinh 9707} 9708 9709define <1 x float> @constrained_vector_tanh_v1f32() #0 { 9710; CHECK-LABEL: constrained_vector_tanh_v1f32: 9711; CHECK: # %bb.0: # %entry 9712; CHECK-NEXT: pushq %rax 9713; CHECK-NEXT: .cfi_def_cfa_offset 16 9714; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9715; CHECK-NEXT: callq tanhf@PLT 9716; CHECK-NEXT: popq %rax 9717; CHECK-NEXT: .cfi_def_cfa_offset 8 9718; CHECK-NEXT: retq 9719; 9720; AVX-LABEL: constrained_vector_tanh_v1f32: 9721; AVX: # %bb.0: # %entry 9722; AVX-NEXT: pushq %rax 9723; AVX-NEXT: .cfi_def_cfa_offset 16 9724; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9725; AVX-NEXT: callq tanhf@PLT 9726; AVX-NEXT: popq %rax 9727; AVX-NEXT: .cfi_def_cfa_offset 8 9728; AVX-NEXT: retq 9729entry: 9730 %tanh = call <1 x float> @llvm.experimental.constrained.tanh.v1f32( 9731 <1 x float> <float 42.0>, 9732 metadata !"round.dynamic", 9733 metadata !"fpexcept.strict") #0 9734 ret <1 x float> %tanh 9735} 9736 9737define <2 x double> @constrained_vector_tanh_v2f64() #0 { 9738; CHECK-LABEL: constrained_vector_tanh_v2f64: 9739; CHECK: # %bb.0: # %entry 9740; CHECK-NEXT: subq $24, %rsp 9741; CHECK-NEXT: .cfi_def_cfa_offset 32 9742; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9743; CHECK-NEXT: callq tanh@PLT 9744; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9745; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9746; CHECK-NEXT: callq tanh@PLT 9747; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 9748; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 9749; CHECK-NEXT: addq $24, %rsp 9750; CHECK-NEXT: .cfi_def_cfa_offset 8 9751; CHECK-NEXT: retq 9752; 9753; AVX-LABEL: constrained_vector_tanh_v2f64: 9754; AVX: # %bb.0: # %entry 9755; AVX-NEXT: subq $24, %rsp 9756; AVX-NEXT: .cfi_def_cfa_offset 32 9757; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9758; AVX-NEXT: callq tanh@PLT 9759; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9760; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9761; AVX-NEXT: callq tanh@PLT 9762; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 9763; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9764; AVX-NEXT: addq $24, %rsp 9765; AVX-NEXT: .cfi_def_cfa_offset 8 9766; AVX-NEXT: retq 9767entry: 9768 %tanh = call <2 x double> @llvm.experimental.constrained.tanh.v2f64( 9769 <2 x double> <double 42.0, double 42.1>, 9770 metadata !"round.dynamic", 9771 metadata !"fpexcept.strict") #0 9772 ret <2 x double> %tanh 9773} 9774 9775define <3 x float> @constrained_vector_tanh_v3f32() #0 { 9776; CHECK-LABEL: constrained_vector_tanh_v3f32: 9777; CHECK: # %bb.0: # %entry 9778; CHECK-NEXT: subq $40, %rsp 9779; CHECK-NEXT: .cfi_def_cfa_offset 48 9780; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 9781; CHECK-NEXT: callq tanhf@PLT 9782; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9783; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9784; CHECK-NEXT: callq tanhf@PLT 9785; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9786; CHECK-NEXT: movss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 9787; CHECK-NEXT: callq tanhf@PLT 9788; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload 9789; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 9790; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 9791; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 9792; CHECK-NEXT: movaps %xmm1, %xmm0 9793; CHECK-NEXT: addq $40, %rsp 9794; CHECK-NEXT: .cfi_def_cfa_offset 8 9795; CHECK-NEXT: retq 9796; 9797; AVX-LABEL: constrained_vector_tanh_v3f32: 9798; AVX: # %bb.0: # %entry 9799; AVX-NEXT: subq $40, %rsp 9800; AVX-NEXT: .cfi_def_cfa_offset 48 9801; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.4E+1,0.0E+0,0.0E+0,0.0E+0] 9802; AVX-NEXT: callq tanhf@PLT 9803; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9804; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.2E+1,0.0E+0,0.0E+0,0.0E+0] 9805; AVX-NEXT: callq tanhf@PLT 9806; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9807; AVX-NEXT: vmovss {{.*#+}} xmm0 = [4.3E+1,0.0E+0,0.0E+0,0.0E+0] 9808; AVX-NEXT: callq tanhf@PLT 9809; AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload 9810; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 9811; AVX-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 9812; AVX-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] 9813; AVX-NEXT: addq $40, %rsp 9814; AVX-NEXT: .cfi_def_cfa_offset 8 9815; AVX-NEXT: retq 9816entry: 9817 %tanh = call <3 x float> @llvm.experimental.constrained.tanh.v3f32( 9818 <3 x float> <float 42.0, float 43.0, float 44.0>, 9819 metadata !"round.dynamic", 9820 metadata !"fpexcept.strict") #0 9821 ret <3 x float> %tanh 9822} 9823 9824define <3 x double> @constrained_vector_tanh_v3f64() #0 { 9825; CHECK-LABEL: constrained_vector_tanh_v3f64: 9826; CHECK: # %bb.0: # %entry 9827; CHECK-NEXT: subq $24, %rsp 9828; CHECK-NEXT: .cfi_def_cfa_offset 32 9829; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9830; CHECK-NEXT: callq tanh@PLT 9831; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill 9832; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9833; CHECK-NEXT: callq tanh@PLT 9834; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill 9835; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9836; CHECK-NEXT: callq tanh@PLT 9837; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) 9838; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) 9839; CHECK-NEXT: wait 9840; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload 9841; CHECK-NEXT: # xmm0 = mem[0],zero 9842; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload 9843; CHECK-NEXT: # xmm1 = mem[0],zero 9844; CHECK-NEXT: addq $24, %rsp 9845; CHECK-NEXT: .cfi_def_cfa_offset 8 9846; CHECK-NEXT: retq 9847; 9848; AVX-LABEL: constrained_vector_tanh_v3f64: 9849; AVX: # %bb.0: # %entry 9850; AVX-NEXT: subq $40, %rsp 9851; AVX-NEXT: .cfi_def_cfa_offset 48 9852; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9853; AVX-NEXT: callq tanh@PLT 9854; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9855; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9856; AVX-NEXT: callq tanh@PLT 9857; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 9858; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9859; AVX-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill 9860; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9861; AVX-NEXT: vzeroupper 9862; AVX-NEXT: callq tanh@PLT 9863; AVX-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload 9864; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 9865; AVX-NEXT: addq $40, %rsp 9866; AVX-NEXT: .cfi_def_cfa_offset 8 9867; AVX-NEXT: retq 9868entry: 9869 %tanh = call <3 x double> @llvm.experimental.constrained.tanh.v3f64( 9870 <3 x double> <double 42.0, double 42.1, double 42.2>, 9871 metadata !"round.dynamic", 9872 metadata !"fpexcept.strict") #0 9873 ret <3 x double> %tanh 9874} 9875 9876define <4 x double> @constrained_vector_tanh_v4f64() #0 { 9877; CHECK-LABEL: constrained_vector_tanh_v4f64: 9878; CHECK: # %bb.0: # %entry 9879; CHECK-NEXT: subq $40, %rsp 9880; CHECK-NEXT: .cfi_def_cfa_offset 48 9881; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9882; CHECK-NEXT: callq tanh@PLT 9883; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9884; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9885; CHECK-NEXT: callq tanh@PLT 9886; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 9887; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] 9888; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 9889; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 9890; CHECK-NEXT: callq tanh@PLT 9891; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9892; CHECK-NEXT: movsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9893; CHECK-NEXT: callq tanh@PLT 9894; CHECK-NEXT: movaps %xmm0, %xmm1 9895; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 9896; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] 9897; CHECK-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 9898; CHECK-NEXT: addq $40, %rsp 9899; CHECK-NEXT: .cfi_def_cfa_offset 8 9900; CHECK-NEXT: retq 9901; 9902; AVX-LABEL: constrained_vector_tanh_v4f64: 9903; AVX: # %bb.0: # %entry 9904; AVX-NEXT: subq $40, %rsp 9905; AVX-NEXT: .cfi_def_cfa_offset 48 9906; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2299999999999997E+1,0.0E+0] 9907; AVX-NEXT: callq tanh@PLT 9908; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9909; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2200000000000003E+1,0.0E+0] 9910; AVX-NEXT: callq tanh@PLT 9911; AVX-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload 9912; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9913; AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 9914; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2100000000000001E+1,0.0E+0] 9915; AVX-NEXT: callq tanh@PLT 9916; AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 9917; AVX-NEXT: vmovsd {{.*#+}} xmm0 = [4.2E+1,0.0E+0] 9918; AVX-NEXT: callq tanh@PLT 9919; AVX-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 9920; AVX-NEXT: # xmm0 = xmm0[0],mem[0] 9921; AVX-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload 9922; AVX-NEXT: addq $40, %rsp 9923; AVX-NEXT: .cfi_def_cfa_offset 8 9924; AVX-NEXT: retq 9925entry: 9926 %tanh = call <4 x double> @llvm.experimental.constrained.tanh.v4f64( 9927 <4 x double> <double 42.0, double 42.1, 9928 double 42.2, double 42.3>, 9929 metadata !"round.dynamic", 9930 metadata !"fpexcept.strict") #0 9931 ret <4 x double> %tanh 9932} 9933 9934declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata) 9935 9936attributes #0 = { strictfp } 9937 9938; Single width declarations 9939declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) 9940declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) 9941declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata) 9942declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata) 9943declare <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double>, <2 x double>, metadata, metadata) 9944declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata) 9945declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata) 9946declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata) 9947declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata) 9948declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata) 9949declare <2 x double> @llvm.experimental.constrained.tan.v2f64(<2 x double>, metadata, metadata) 9950declare <2 x double> @llvm.experimental.constrained.asin.v2f64(<2 x double>, metadata, metadata) 9951declare <2 x double> @llvm.experimental.constrained.acos.v2f64(<2 x double>, metadata, metadata) 9952declare <2 x double> @llvm.experimental.constrained.atan.v2f64(<2 x double>, metadata, metadata) 9953declare <2 x double> @llvm.experimental.constrained.sinh.v2f64(<2 x double>, metadata, metadata) 9954declare <2 x double> @llvm.experimental.constrained.cosh.v2f64(<2 x double>, metadata, metadata) 9955declare <2 x double> @llvm.experimental.constrained.tanh.v2f64(<2 x double>, metadata, metadata) 9956declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata) 9957declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata) 9958declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata) 9959declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata) 9960declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata) 9961declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata) 9962declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata) 9963declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata) 9964declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata) 9965declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f32(<2 x float>, metadata) 9966declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f32(<2 x float>, metadata) 9967declare <2 x i32> @llvm.experimental.constrained.fptosi.v2i32.v2f64(<2 x double>, metadata) 9968declare <2 x i64> @llvm.experimental.constrained.fptosi.v2i64.v2f64(<2 x double>, metadata) 9969declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f32(<2 x float>, metadata) 9970declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f32(<2 x float>, metadata) 9971declare <2 x i32> @llvm.experimental.constrained.fptoui.v2i32.v2f64(<2 x double>, metadata) 9972declare <2 x i64> @llvm.experimental.constrained.fptoui.v2i64.v2f64(<2 x double>, metadata) 9973declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata) 9974declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata) 9975declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata) 9976declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata) 9977declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata) 9978declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata) 9979declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32>, metadata, metadata) 9980declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32>, metadata, metadata) 9981declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i64(<2 x i64>, metadata, metadata) 9982declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i64(<2 x i64>, metadata, metadata) 9983declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32>, metadata, metadata) 9984declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32>, metadata, metadata) 9985declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i64(<2 x i64>, metadata, metadata) 9986declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i64(<2 x i64>, metadata, metadata) 9987 9988; Scalar width declarations 9989declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata) 9990declare <1 x float> @llvm.experimental.constrained.fsub.v1f32(<1 x float>, <1 x float>, metadata, metadata) 9991declare <1 x float> @llvm.experimental.constrained.fmul.v1f32(<1 x float>, <1 x float>, metadata, metadata) 9992declare <1 x float> @llvm.experimental.constrained.fdiv.v1f32(<1 x float>, <1 x float>, metadata, metadata) 9993declare <1 x float> @llvm.experimental.constrained.frem.v1f32(<1 x float>, <1 x float>, metadata, metadata) 9994declare <1 x float> @llvm.experimental.constrained.sqrt.v1f32(<1 x float>, metadata, metadata) 9995declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x float>, metadata, metadata) 9996declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata) 9997declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata) 9998declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata) 9999declare <1 x float> @llvm.experimental.constrained.tan.v1f32(<1 x float>, metadata, metadata) 10000declare <1 x float> @llvm.experimental.constrained.asin.v1f32(<1 x float>, metadata, metadata) 10001declare <1 x float> @llvm.experimental.constrained.acos.v1f32(<1 x float>, metadata, metadata) 10002declare <1 x float> @llvm.experimental.constrained.atan.v1f32(<1 x float>, metadata, metadata) 10003declare <1 x float> @llvm.experimental.constrained.sinh.v1f32(<1 x float>, metadata, metadata) 10004declare <1 x float> @llvm.experimental.constrained.cosh.v1f32(<1 x float>, metadata, metadata) 10005declare <1 x float> @llvm.experimental.constrained.tanh.v1f32(<1 x float>, metadata, metadata) 10006declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata) 10007declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata) 10008declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata) 10009declare <1 x float> @llvm.experimental.constrained.log10.v1f32(<1 x float>, metadata, metadata) 10010declare <1 x float> @llvm.experimental.constrained.log2.v1f32(<1 x float>, metadata, metadata) 10011declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metadata, metadata) 10012declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata) 10013declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata) 10014declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata) 10015declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f32(<1 x float>, metadata) 10016declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f32(<1 x float>, metadata) 10017declare <1 x i32> @llvm.experimental.constrained.fptosi.v1i32.v1f64(<1 x double>, metadata) 10018declare <1 x i64> @llvm.experimental.constrained.fptosi.v1i64.v1f64(<1 x double>, metadata) 10019declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f32(<1 x float>, metadata) 10020declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f32(<1 x float>, metadata) 10021declare <1 x i32> @llvm.experimental.constrained.fptoui.v1i32.v1f64(<1 x double>, metadata) 10022declare <1 x i64> @llvm.experimental.constrained.fptoui.v1i64.v1f64(<1 x double>, metadata) 10023declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata) 10024declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata) 10025declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata) 10026declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata) 10027declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata) 10028declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata) 10029declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32>, metadata, metadata) 10030declare <1 x float> @llvm.experimental.constrained.sitofp.v1f32.v1i32(<1 x i32>, metadata, metadata) 10031declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i64(<1 x i64>, metadata, metadata) 10032declare <1 x float> @llvm.experimental.constrained.sitofp.v1f32.v1i64(<1 x i64>, metadata, metadata) 10033declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32>, metadata, metadata) 10034declare <1 x float> @llvm.experimental.constrained.uitofp.v1f32.v1i32(<1 x i32>, metadata, metadata) 10035declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i64(<1 x i64>, metadata, metadata) 10036declare <1 x float> @llvm.experimental.constrained.uitofp.v1f32.v1i64(<1 x i64>, metadata, metadata) 10037 10038; Illegal width declarations 10039declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata) 10040declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata) 10041declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata) 10042declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata) 10043declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata) 10044declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata) 10045declare <3 x float> @llvm.experimental.constrained.fdiv.v3f32(<3 x float>, <3 x float>, metadata, metadata) 10046declare <3 x double> @llvm.experimental.constrained.fdiv.v3f64(<3 x double>, <3 x double>, metadata, metadata) 10047declare <3 x float> @llvm.experimental.constrained.frem.v3f32(<3 x float>, <3 x float>, metadata, metadata) 10048declare <3 x double> @llvm.experimental.constrained.frem.v3f64(<3 x double>, <3 x double>, metadata, metadata) 10049declare <3 x float> @llvm.experimental.constrained.sqrt.v3f32(<3 x float>, metadata, metadata) 10050declare <3 x double> @llvm.experimental.constrained.sqrt.v3f64(<3 x double>, metadata, metadata) 10051declare <3 x float> @llvm.experimental.constrained.pow.v3f32(<3 x float>, <3 x float>, metadata, metadata) 10052declare <3 x double> @llvm.experimental.constrained.pow.v3f64(<3 x double>, <3 x double>, metadata, metadata) 10053declare <3 x float> @llvm.experimental.constrained.powi.v3f32(<3 x float>, i32, metadata, metadata) 10054declare <3 x double> @llvm.experimental.constrained.powi.v3f64(<3 x double>, i32, metadata, metadata) 10055declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metadata, metadata) 10056declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata) 10057declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata) 10058declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata) 10059declare <3 x float> @llvm.experimental.constrained.tan.v3f32(<3 x float>, metadata, metadata) 10060declare <3 x double> @llvm.experimental.constrained.tan.v3f64(<3 x double>, metadata, metadata) 10061declare <3 x float> @llvm.experimental.constrained.asin.v3f32(<3 x float>, metadata, metadata) 10062declare <3 x double> @llvm.experimental.constrained.asin.v3f64(<3 x double>, metadata, metadata) 10063declare <3 x float> @llvm.experimental.constrained.acos.v3f32(<3 x float>, metadata, metadata) 10064declare <3 x double> @llvm.experimental.constrained.acos.v3f64(<3 x double>, metadata, metadata) 10065declare <3 x float> @llvm.experimental.constrained.atan.v3f32(<3 x float>, metadata, metadata) 10066declare <3 x double> @llvm.experimental.constrained.atan.v3f64(<3 x double>, metadata, metadata) 10067declare <3 x float> @llvm.experimental.constrained.sinh.v3f32(<3 x float>, metadata, metadata) 10068declare <3 x double> @llvm.experimental.constrained.sinh.v3f64(<3 x double>, metadata, metadata) 10069declare <3 x float> @llvm.experimental.constrained.cosh.v3f32(<3 x float>, metadata, metadata) 10070declare <3 x double> @llvm.experimental.constrained.cosh.v3f64(<3 x double>, metadata, metadata) 10071declare <3 x float> @llvm.experimental.constrained.tanh.v3f32(<3 x float>, metadata, metadata) 10072declare <3 x double> @llvm.experimental.constrained.tanh.v3f64(<3 x double>, metadata, metadata) 10073declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata) 10074declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata) 10075declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata) 10076declare <3 x double> @llvm.experimental.constrained.exp2.v3f64(<3 x double>, metadata, metadata) 10077declare <3 x float> @llvm.experimental.constrained.log.v3f32(<3 x float>, metadata, metadata) 10078declare <3 x double> @llvm.experimental.constrained.log.v3f64(<3 x double>, metadata, metadata) 10079declare <3 x float> @llvm.experimental.constrained.log10.v3f32(<3 x float>, metadata, metadata) 10080declare <3 x double> @llvm.experimental.constrained.log10.v3f64(<3 x double>, metadata, metadata) 10081declare <3 x float> @llvm.experimental.constrained.log2.v3f32(<3 x float>, metadata, metadata) 10082declare <3 x double> @llvm.experimental.constrained.log2.v3f64(<3 x double>, metadata, metadata) 10083declare <3 x float> @llvm.experimental.constrained.rint.v3f32(<3 x float>, metadata, metadata) 10084declare <3 x double> @llvm.experimental.constrained.rint.v3f64(<3 x double>, metadata, metadata) 10085declare <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(<3 x float>, metadata, metadata) 10086declare <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(<3 x double>, metadata, metadata) 10087declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3 x float>, metadata) 10088declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata) 10089declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata) 10090declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata) 10091declare <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f32(<3 x float>, metadata) 10092declare <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f32(<3 x float>, metadata) 10093declare <3 x i32> @llvm.experimental.constrained.fptosi.v3i32.v3f64(<3 x double>, metadata) 10094declare <3 x i64> @llvm.experimental.constrained.fptosi.v3i64.v3f64(<3 x double>, metadata) 10095declare <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f32(<3 x float>, metadata) 10096declare <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f32(<3 x float>, metadata) 10097declare <3 x i32> @llvm.experimental.constrained.fptoui.v3i32.v3f64(<3 x double>, metadata) 10098declare <3 x i64> @llvm.experimental.constrained.fptoui.v3i64.v3f64(<3 x double>, metadata) 10099declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata) 10100declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata) 10101declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata) 10102declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata) 10103declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata) 10104declare <3 x double> @llvm.experimental.constrained.floor.v3f64(<3 x double>, metadata) 10105declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, metadata) 10106declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata) 10107declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata) 10108declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata) 10109declare <3 x double> @llvm.experimental.constrained.sitofp.v3f64.v3i32(<3 x i32>, metadata, metadata) 10110declare <3 x float> @llvm.experimental.constrained.sitofp.v3f32.v3i32(<3 x i32>, metadata, metadata) 10111declare <3 x double> @llvm.experimental.constrained.sitofp.v3f64.v3i64(<3 x i64>, metadata, metadata) 10112declare <3 x float> @llvm.experimental.constrained.sitofp.v3f32.v3i64(<3 x i64>, metadata, metadata) 10113declare <3 x double> @llvm.experimental.constrained.uitofp.v3f64.v3i32(<3 x i32>, metadata, metadata) 10114declare <3 x float> @llvm.experimental.constrained.uitofp.v3f32.v3i32(<3 x i32>, metadata, metadata) 10115declare <3 x double> @llvm.experimental.constrained.uitofp.v3f64.v3i64(<3 x i64>, metadata, metadata) 10116declare <3 x float> @llvm.experimental.constrained.uitofp.v3f32.v3i64(<3 x i64>, metadata, metadata) 10117 10118; Double width declarations 10119declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata) 10120declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata) 10121declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata) 10122declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata) 10123declare <4 x double> @llvm.experimental.constrained.frem.v4f64(<4 x double>, <4 x double>, metadata, metadata) 10124declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata) 10125declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x double>, metadata, metadata) 10126declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata) 10127declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata) 10128declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata) 10129declare <4 x double> @llvm.experimental.constrained.tan.v4f64(<4 x double>, metadata, metadata) 10130declare <4 x double> @llvm.experimental.constrained.asin.v4f64(<4 x double>, metadata, metadata) 10131declare <4 x double> @llvm.experimental.constrained.acos.v4f64(<4 x double>, metadata, metadata) 10132declare <4 x double> @llvm.experimental.constrained.atan.v4f64(<4 x double>, metadata, metadata) 10133declare <4 x double> @llvm.experimental.constrained.atan2.v4f64(<4 x double>, <4 x double>, metadata, metadata) 10134declare <4 x double> @llvm.experimental.constrained.sinh.v4f64(<4 x double>, metadata, metadata) 10135declare <4 x double> @llvm.experimental.constrained.cosh.v4f64(<4 x double>, metadata, metadata) 10136declare <4 x double> @llvm.experimental.constrained.tanh.v4f64(<4 x double>, metadata, metadata) 10137declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata) 10138declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata) 10139declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata) 10140declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, metadata, metadata) 10141declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata) 10142declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata) 10143declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata) 10144declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata) 10145declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata) 10146declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f32(<4 x float>, metadata) 10147declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f32(<4 x float>, metadata) 10148declare <4 x i32> @llvm.experimental.constrained.fptosi.v4i32.v4f64(<4 x double>, metadata) 10149declare <4 x i64> @llvm.experimental.constrained.fptosi.v4i64.v4f64(<4 x double>, metadata) 10150declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f32(<4 x float>, metadata) 10151declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f32(<4 x float>, metadata) 10152declare <4 x i32> @llvm.experimental.constrained.fptoui.v4i32.v4f64(<4 x double>, metadata) 10153declare <4 x i64> @llvm.experimental.constrained.fptoui.v4i64.v4f64(<4 x double>, metadata) 10154declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata) 10155declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata) 10156declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata) 10157declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata) 10158declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata) 10159declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata) 10160declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i32(<4 x i32>, metadata, metadata) 10161declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i32(<4 x i32>, metadata, metadata) 10162declare <4 x double> @llvm.experimental.constrained.sitofp.v4f64.v4i64(<4 x i64>, metadata, metadata) 10163declare <4 x float> @llvm.experimental.constrained.sitofp.v4f32.v4i64(<4 x i64>, metadata, metadata) 10164declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i32(<4 x i32>, metadata, metadata) 10165declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i32(<4 x i32>, metadata, metadata) 10166declare <4 x double> @llvm.experimental.constrained.uitofp.v4f64.v4i64(<4 x i64>, metadata, metadata) 10167declare <4 x float> @llvm.experimental.constrained.uitofp.v4f32.v4i64(<4 x i64>, metadata, metadata) 10168 10169