1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,NODQ-32 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 | FileCheck %s --check-prefixes=CHECK,NODQ-64 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=avx512f,avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,DQ,DQ-32 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f,avx512dq -O3 | FileCheck %s --check-prefixes=CHECK,DQ,DQ-64 6 7declare <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i1(<16 x i1>, metadata, metadata) 8declare <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i1(<16 x i1>, metadata, metadata) 9declare <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i8(<16 x i8>, metadata, metadata) 10declare <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i8(<16 x i8>, metadata, metadata) 11declare <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i16(<16 x i16>, metadata, metadata) 12declare <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i16(<16 x i16>, metadata, metadata) 13declare <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i32(<16 x i32>, metadata, metadata) 14declare <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i32(<16 x i32>, metadata, metadata) 15declare <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i1(<8 x i1>, metadata, metadata) 16declare <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i1(<8 x i1>, metadata, metadata) 17declare <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i8(<8 x i8>, metadata, metadata) 18declare <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i8(<8 x i8>, metadata, metadata) 19declare <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i16(<8 x i16>, metadata, metadata) 20declare <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i16(<8 x i16>, metadata, metadata) 21declare <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i32(<8 x i32>, metadata, metadata) 22declare <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i32(<8 x i32>, metadata, metadata) 23declare <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64>, metadata, metadata) 24declare <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64>, metadata, metadata) 25declare <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64>, metadata, metadata) 26declare <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64>, metadata, metadata) 27 28define <16 x float> @sitofp_v16i1_v16f32(<16 x i1> %x) #0 { 29; CHECK-LABEL: sitofp_v16i1_v16f32: 30; CHECK: # %bb.0: 31; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 32; CHECK-NEXT: vpslld $31, %zmm0, %zmm0 33; CHECK-NEXT: vpsrad $31, %zmm0, %zmm0 34; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 35; CHECK-NEXT: ret{{[l|q]}} 36 %result = call <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i1(<16 x i1> %x, 37 metadata !"round.dynamic", 38 metadata !"fpexcept.strict") #0 39 ret <16 x float> %result 40} 41 42define <16 x float> @uitofp_v16i1_v16f32(<16 x i1> %x) #0 { 43; NODQ-32-LABEL: uitofp_v16i1_v16f32: 44; NODQ-32: # %bb.0: 45; NODQ-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 46; NODQ-32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 47; NODQ-32-NEXT: vcvtdq2ps %zmm0, %zmm0 48; NODQ-32-NEXT: retl 49; 50; NODQ-64-LABEL: uitofp_v16i1_v16f32: 51; NODQ-64: # %bb.0: 52; NODQ-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 53; NODQ-64-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 54; NODQ-64-NEXT: vcvtdq2ps %zmm0, %zmm0 55; NODQ-64-NEXT: retq 56; 57; DQ-32-LABEL: uitofp_v16i1_v16f32: 58; DQ-32: # %bb.0: 59; DQ-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 60; DQ-32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 61; DQ-32-NEXT: vcvtdq2ps %zmm0, %zmm0 62; DQ-32-NEXT: retl 63; 64; DQ-64-LABEL: uitofp_v16i1_v16f32: 65; DQ-64: # %bb.0: 66; DQ-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 67; DQ-64-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 68; DQ-64-NEXT: vcvtdq2ps %zmm0, %zmm0 69; DQ-64-NEXT: retq 70 %result = call <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i1(<16 x i1> %x, 71 metadata !"round.dynamic", 72 metadata !"fpexcept.strict") #0 73 ret <16 x float> %result 74} 75 76define <16 x float> @sitofp_v16i8_v16f32(<16 x i8> %x) #0 { 77; CHECK-LABEL: sitofp_v16i8_v16f32: 78; CHECK: # %bb.0: 79; CHECK-NEXT: vpmovsxbd %xmm0, %zmm0 80; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 81; CHECK-NEXT: ret{{[l|q]}} 82 %result = call <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i8(<16 x i8> %x, 83 metadata !"round.dynamic", 84 metadata !"fpexcept.strict") #0 85 ret <16 x float> %result 86} 87 88define <16 x float> @uitofp_v16i8_v16f32(<16 x i8> %x) #0 { 89; CHECK-LABEL: uitofp_v16i8_v16f32: 90; CHECK: # %bb.0: 91; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 92; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 93; CHECK-NEXT: ret{{[l|q]}} 94 %result = call <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i8(<16 x i8> %x, 95 metadata !"round.dynamic", 96 metadata !"fpexcept.strict") #0 97 ret <16 x float> %result 98} 99 100define <16 x float> @sitofp_v16i16_v16f32(<16 x i16> %x) #0 { 101; CHECK-LABEL: sitofp_v16i16_v16f32: 102; CHECK: # %bb.0: 103; CHECK-NEXT: vpmovsxwd %ymm0, %zmm0 104; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 105; CHECK-NEXT: ret{{[l|q]}} 106 %result = call <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i16(<16 x i16> %x, 107 metadata !"round.dynamic", 108 metadata !"fpexcept.strict") #0 109 ret <16 x float> %result 110} 111 112define <16 x float> @uitofp_v16i16_v16f32(<16 x i16> %x) #0 { 113; CHECK-LABEL: uitofp_v16i16_v16f32: 114; CHECK: # %bb.0: 115; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 116; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 117; CHECK-NEXT: ret{{[l|q]}} 118 %result = call <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i16(<16 x i16> %x, 119 metadata !"round.dynamic", 120 metadata !"fpexcept.strict") #0 121 ret <16 x float> %result 122} 123 124define <16 x float> @sitofp_v16i32_v16f32(<16 x i32> %x) #0 { 125; CHECK-LABEL: sitofp_v16i32_v16f32: 126; CHECK: # %bb.0: 127; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 128; CHECK-NEXT: ret{{[l|q]}} 129 %result = call <16 x float> @llvm.experimental.constrained.sitofp.v16f32.v16i32(<16 x i32> %x, 130 metadata !"round.dynamic", 131 metadata !"fpexcept.strict") #0 132 ret <16 x float> %result 133} 134 135define <16 x float> @uitofp_v16i32_v16f32(<16 x i32> %x) #0 { 136; CHECK-LABEL: uitofp_v16i32_v16f32: 137; CHECK: # %bb.0: 138; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 139; CHECK-NEXT: ret{{[l|q]}} 140 %result = call <16 x float> @llvm.experimental.constrained.uitofp.v16f32.v16i32(<16 x i32> %x, 141 metadata !"round.dynamic", 142 metadata !"fpexcept.strict") #0 143 ret <16 x float> %result 144} 145 146define <8 x double> @sitofp_v8i1_v8f64(<8 x i1> %x) #0 { 147; CHECK-LABEL: sitofp_v8i1_v8f64: 148; CHECK: # %bb.0: 149; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 150; CHECK-NEXT: vpslld $31, %ymm0, %ymm0 151; CHECK-NEXT: vpsrad $31, %ymm0, %ymm0 152; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 153; CHECK-NEXT: ret{{[l|q]}} 154 %result = call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i1(<8 x i1> %x, 155 metadata !"round.dynamic", 156 metadata !"fpexcept.strict") #0 157 ret <8 x double> %result 158} 159 160define <8 x double> @uitofp_v8i1_v8f64(<8 x i1> %x) #0 { 161; NODQ-32-LABEL: uitofp_v8i1_v8f64: 162; NODQ-32: # %bb.0: 163; NODQ-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 164; NODQ-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 165; NODQ-32-NEXT: vcvtdq2pd %ymm0, %zmm0 166; NODQ-32-NEXT: retl 167; 168; NODQ-64-LABEL: uitofp_v8i1_v8f64: 169; NODQ-64: # %bb.0: 170; NODQ-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 171; NODQ-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 172; NODQ-64-NEXT: vcvtdq2pd %ymm0, %zmm0 173; NODQ-64-NEXT: retq 174; 175; DQ-32-LABEL: uitofp_v8i1_v8f64: 176; DQ-32: # %bb.0: 177; DQ-32-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 178; DQ-32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 179; DQ-32-NEXT: vcvtdq2pd %ymm0, %zmm0 180; DQ-32-NEXT: retl 181; 182; DQ-64-LABEL: uitofp_v8i1_v8f64: 183; DQ-64: # %bb.0: 184; DQ-64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 185; DQ-64-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 186; DQ-64-NEXT: vcvtdq2pd %ymm0, %zmm0 187; DQ-64-NEXT: retq 188 %result = call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i1(<8 x i1> %x, 189 metadata !"round.dynamic", 190 metadata !"fpexcept.strict") #0 191 ret <8 x double> %result 192} 193 194define <8 x double> @sitofp_v8i8_v8f64(<8 x i8> %x) #0 { 195; CHECK-LABEL: sitofp_v8i8_v8f64: 196; CHECK: # %bb.0: 197; CHECK-NEXT: vpmovsxbd %xmm0, %ymm0 198; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 199; CHECK-NEXT: ret{{[l|q]}} 200 %result = call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i8(<8 x i8> %x, 201 metadata !"round.dynamic", 202 metadata !"fpexcept.strict") #0 203 ret <8 x double> %result 204} 205 206define <8 x double> @uitofp_v8i8_v8f64(<8 x i8> %x) #0 { 207; CHECK-LABEL: uitofp_v8i8_v8f64: 208; CHECK: # %bb.0: 209; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 210; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 211; CHECK-NEXT: ret{{[l|q]}} 212 %result = call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i8(<8 x i8> %x, 213 metadata !"round.dynamic", 214 metadata !"fpexcept.strict") #0 215 ret <8 x double> %result 216} 217 218define <8 x double> @sitofp_v8i16_v8f64(<8 x i16> %x) #0 { 219; CHECK-LABEL: sitofp_v8i16_v8f64: 220; CHECK: # %bb.0: 221; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 222; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 223; CHECK-NEXT: ret{{[l|q]}} 224 %result = call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i16(<8 x i16> %x, 225 metadata !"round.dynamic", 226 metadata !"fpexcept.strict") #0 227 ret <8 x double> %result 228} 229 230define <8 x double> @uitofp_v8i16_v8f64(<8 x i16> %x) #0 { 231; CHECK-LABEL: uitofp_v8i16_v8f64: 232; CHECK: # %bb.0: 233; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 234; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 235; CHECK-NEXT: ret{{[l|q]}} 236 %result = call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i16(<8 x i16> %x, 237 metadata !"round.dynamic", 238 metadata !"fpexcept.strict") #0 239 ret <8 x double> %result 240} 241 242define <8 x double> @sitofp_v8i32_v8f64(<8 x i32> %x) #0 { 243; CHECK-LABEL: sitofp_v8i32_v8f64: 244; CHECK: # %bb.0: 245; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 246; CHECK-NEXT: ret{{[l|q]}} 247 %result = call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i32(<8 x i32> %x, 248 metadata !"round.dynamic", 249 metadata !"fpexcept.strict") #0 250 ret <8 x double> %result 251} 252 253define <8 x double> @uitofp_v8i32_v8f64(<8 x i32> %x) #0 { 254; CHECK-LABEL: uitofp_v8i32_v8f64: 255; CHECK: # %bb.0: 256; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 257; CHECK-NEXT: ret{{[l|q]}} 258 %result = call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i32(<8 x i32> %x, 259 metadata !"round.dynamic", 260 metadata !"fpexcept.strict") #0 261 ret <8 x double> %result 262} 263 264define <8 x double> @sitofp_v8i64_v8f64(<8 x i64> %x) #0 { 265; NODQ-32-LABEL: sitofp_v8i64_v8f64: 266; NODQ-32: # %bb.0: 267; NODQ-32-NEXT: pushl %ebp 268; NODQ-32-NEXT: .cfi_def_cfa_offset 8 269; NODQ-32-NEXT: .cfi_offset %ebp, -8 270; NODQ-32-NEXT: movl %esp, %ebp 271; NODQ-32-NEXT: .cfi_def_cfa_register %ebp 272; NODQ-32-NEXT: andl $-8, %esp 273; NODQ-32-NEXT: subl $128, %esp 274; NODQ-32-NEXT: vextractf32x4 $2, %zmm0, %xmm1 275; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 276; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,2,3] 277; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 278; NODQ-32-NEXT: vextractf32x4 $3, %zmm0, %xmm1 279; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 280; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,2,3] 281; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 282; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 283; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 284; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 285; NODQ-32-NEXT: vextractf128 $1, %ymm0, %xmm0 286; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 287; NODQ-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3] 288; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 289; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 290; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) 291; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 292; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) 293; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 294; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) 295; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 296; NODQ-32-NEXT: fstpl (%esp) 297; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 298; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) 299; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 300; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) 301; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 302; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) 303; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 304; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) 305; NODQ-32-NEXT: wait 306; NODQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 307; NODQ-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 308; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 309; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] 310; NODQ-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 311; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 312; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] 313; NODQ-32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 314; NODQ-32-NEXT: vmovhps {{.*#+}} xmm2 = xmm2[0,1],mem[0,1] 315; NODQ-32-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 316; NODQ-32-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 317; NODQ-32-NEXT: movl %ebp, %esp 318; NODQ-32-NEXT: popl %ebp 319; NODQ-32-NEXT: .cfi_def_cfa %esp, 4 320; NODQ-32-NEXT: retl 321; 322; NODQ-64-LABEL: sitofp_v8i64_v8f64: 323; NODQ-64: # %bb.0: 324; NODQ-64-NEXT: vextracti32x4 $3, %zmm0, %xmm1 325; NODQ-64-NEXT: vpextrq $1, %xmm1, %rax 326; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 327; NODQ-64-NEXT: vmovq %xmm1, %rax 328; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm1 329; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 330; NODQ-64-NEXT: vextracti32x4 $2, %zmm0, %xmm2 331; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax 332; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm3, %xmm3 333; NODQ-64-NEXT: vmovq %xmm2, %rax 334; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 335; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 336; NODQ-64-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 337; NODQ-64-NEXT: vextracti128 $1, %ymm0, %xmm2 338; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax 339; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 340; NODQ-64-NEXT: vmovq %xmm2, %rax 341; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm2 342; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 343; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax 344; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm3 345; NODQ-64-NEXT: vmovq %xmm0, %rax 346; NODQ-64-NEXT: vcvtsi2sd %rax, %xmm4, %xmm0 347; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] 348; NODQ-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 349; NODQ-64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 350; NODQ-64-NEXT: retq 351; 352; DQ-LABEL: sitofp_v8i64_v8f64: 353; DQ: # %bb.0: 354; DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 355; DQ-NEXT: ret{{[l|q]}} 356 %result = call <8 x double> @llvm.experimental.constrained.sitofp.v8f64.v8i64(<8 x i64> %x, 357 metadata !"round.dynamic", 358 metadata !"fpexcept.strict") #0 359 ret <8 x double> %result 360} 361 362define <8 x double> @uitofp_v8i64_v8f64(<8 x i64> %x) #0 { 363; NODQ-32-LABEL: uitofp_v8i64_v8f64: 364; NODQ-32: # %bb.0: 365; NODQ-32-NEXT: pushl %ebp 366; NODQ-32-NEXT: .cfi_def_cfa_offset 8 367; NODQ-32-NEXT: .cfi_offset %ebp, -8 368; NODQ-32-NEXT: movl %esp, %ebp 369; NODQ-32-NEXT: .cfi_def_cfa_register %ebp 370; NODQ-32-NEXT: andl $-8, %esp 371; NODQ-32-NEXT: subl $128, %esp 372; NODQ-32-NEXT: vextractf32x4 $2, %zmm0, %xmm3 373; NODQ-32-NEXT: vmovlps %xmm3, {{[0-9]+}}(%esp) 374; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm3[2,3,2,3] 375; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 376; NODQ-32-NEXT: vextractf32x4 $3, %zmm0, %xmm2 377; NODQ-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp) 378; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm2[2,3,2,3] 379; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 380; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 381; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 382; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 383; NODQ-32-NEXT: vextractf128 $1, %ymm0, %xmm1 384; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 385; NODQ-32-NEXT: vshufps {{.*#+}} xmm4 = xmm1[2,3,2,3] 386; NODQ-32-NEXT: vmovlps %xmm4, {{[0-9]+}}(%esp) 387; NODQ-32-NEXT: vextractps $1, %xmm3, %eax 388; NODQ-32-NEXT: shrl $31, %eax 389; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 390; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 391; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) 392; NODQ-32-NEXT: wait 393; NODQ-32-NEXT: vextractps $3, %xmm3, %eax 394; NODQ-32-NEXT: shrl $31, %eax 395; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 396; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 397; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) 398; NODQ-32-NEXT: wait 399; NODQ-32-NEXT: vextractps $1, %xmm2, %eax 400; NODQ-32-NEXT: shrl $31, %eax 401; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 402; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 403; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) 404; NODQ-32-NEXT: wait 405; NODQ-32-NEXT: vextractps $3, %xmm2, %eax 406; NODQ-32-NEXT: shrl $31, %eax 407; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 408; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 409; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) 410; NODQ-32-NEXT: wait 411; NODQ-32-NEXT: vextractps $1, %xmm0, %eax 412; NODQ-32-NEXT: shrl $31, %eax 413; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 414; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 415; NODQ-32-NEXT: fstpl (%esp) 416; NODQ-32-NEXT: wait 417; NODQ-32-NEXT: vextractps $3, %xmm0, %eax 418; NODQ-32-NEXT: shrl $31, %eax 419; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 420; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 421; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) 422; NODQ-32-NEXT: wait 423; NODQ-32-NEXT: vextractps $1, %xmm1, %eax 424; NODQ-32-NEXT: shrl $31, %eax 425; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 426; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 427; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) 428; NODQ-32-NEXT: wait 429; NODQ-32-NEXT: vextractps $3, %xmm1, %eax 430; NODQ-32-NEXT: shrl $31, %eax 431; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 432; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 433; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) 434; NODQ-32-NEXT: wait 435; NODQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 436; NODQ-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 437; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 438; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] 439; NODQ-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 440; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 441; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] 442; NODQ-32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 443; NODQ-32-NEXT: vmovhps {{.*#+}} xmm2 = xmm2[0,1],mem[0,1] 444; NODQ-32-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 445; NODQ-32-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 446; NODQ-32-NEXT: movl %ebp, %esp 447; NODQ-32-NEXT: popl %ebp 448; NODQ-32-NEXT: .cfi_def_cfa %esp, 4 449; NODQ-32-NEXT: retl 450; 451; NODQ-64-LABEL: uitofp_v8i64_v8f64: 452; NODQ-64: # %bb.0: 453; NODQ-64-NEXT: vextracti32x4 $3, %zmm0, %xmm1 454; NODQ-64-NEXT: vpextrq $1, %xmm1, %rax 455; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm2, %xmm2 456; NODQ-64-NEXT: vmovq %xmm1, %rax 457; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm1 458; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 459; NODQ-64-NEXT: vextracti32x4 $2, %zmm0, %xmm2 460; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax 461; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm3, %xmm3 462; NODQ-64-NEXT: vmovq %xmm2, %rax 463; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm4, %xmm2 464; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 465; NODQ-64-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 466; NODQ-64-NEXT: vextracti128 $1, %ymm0, %xmm2 467; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax 468; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm4, %xmm3 469; NODQ-64-NEXT: vmovq %xmm2, %rax 470; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm4, %xmm2 471; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 472; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax 473; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm4, %xmm3 474; NODQ-64-NEXT: vmovq %xmm0, %rax 475; NODQ-64-NEXT: vcvtusi2sd %rax, %xmm4, %xmm0 476; NODQ-64-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0] 477; NODQ-64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 478; NODQ-64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 479; NODQ-64-NEXT: retq 480; 481; DQ-LABEL: uitofp_v8i64_v8f64: 482; DQ: # %bb.0: 483; DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 484; DQ-NEXT: ret{{[l|q]}} 485 %result = call <8 x double> @llvm.experimental.constrained.uitofp.v8f64.v8i64(<8 x i64> %x, 486 metadata !"round.dynamic", 487 metadata !"fpexcept.strict") #0 488 ret <8 x double> %result 489} 490 491define <8 x float> @sitofp_v8i64_v8f32(<8 x i64> %x) #0 { 492; NODQ-32-LABEL: sitofp_v8i64_v8f32: 493; NODQ-32: # %bb.0: 494; NODQ-32-NEXT: pushl %ebp 495; NODQ-32-NEXT: .cfi_def_cfa_offset 8 496; NODQ-32-NEXT: .cfi_offset %ebp, -8 497; NODQ-32-NEXT: movl %esp, %ebp 498; NODQ-32-NEXT: .cfi_def_cfa_register %ebp 499; NODQ-32-NEXT: andl $-8, %esp 500; NODQ-32-NEXT: subl $96, %esp 501; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 502; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 503; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 504; NODQ-32-NEXT: vextractf128 $1, %ymm0, %xmm1 505; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 506; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,2,3] 507; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 508; NODQ-32-NEXT: vextractf32x4 $2, %zmm0, %xmm1 509; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 510; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,3,2,3] 511; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 512; NODQ-32-NEXT: vextractf32x4 $3, %zmm0, %xmm0 513; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 514; NODQ-32-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,2,3] 515; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 516; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 517; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) 518; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 519; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) 520; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 521; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) 522; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 523; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) 524; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 525; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) 526; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 527; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) 528; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 529; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) 530; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 531; NODQ-32-NEXT: fstps (%esp) 532; NODQ-32-NEXT: wait 533; NODQ-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 534; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 535; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 536; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 537; NODQ-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 538; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] 539; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] 540; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] 541; NODQ-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 542; NODQ-32-NEXT: movl %ebp, %esp 543; NODQ-32-NEXT: popl %ebp 544; NODQ-32-NEXT: .cfi_def_cfa %esp, 4 545; NODQ-32-NEXT: retl 546; 547; NODQ-64-LABEL: sitofp_v8i64_v8f32: 548; NODQ-64: # %bb.0: 549; NODQ-64-NEXT: vextracti32x4 $2, %zmm0, %xmm1 550; NODQ-64-NEXT: vpextrq $1, %xmm1, %rax 551; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2 552; NODQ-64-NEXT: vmovq %xmm1, %rax 553; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm1 554; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 555; NODQ-64-NEXT: vextracti32x4 $3, %zmm0, %xmm2 556; NODQ-64-NEXT: vmovq %xmm2, %rax 557; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm3, %xmm3 558; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] 559; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax 560; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2 561; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] 562; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax 563; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm2 564; NODQ-64-NEXT: vmovq %xmm0, %rax 565; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 566; NODQ-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 567; NODQ-64-NEXT: vextracti128 $1, %ymm0, %xmm0 568; NODQ-64-NEXT: vmovq %xmm0, %rax 569; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm3 570; NODQ-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 571; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax 572; NODQ-64-NEXT: vcvtsi2ss %rax, %xmm4, %xmm0 573; NODQ-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] 574; NODQ-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 575; NODQ-64-NEXT: retq 576; 577; DQ-LABEL: sitofp_v8i64_v8f32: 578; DQ: # %bb.0: 579; DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 580; DQ-NEXT: ret{{[l|q]}} 581 %result = call <8 x float> @llvm.experimental.constrained.sitofp.v8f32.v8i64(<8 x i64> %x, 582 metadata !"round.dynamic", 583 metadata !"fpexcept.strict") #0 584 ret <8 x float> %result 585} 586 587define <8 x float> @uitofp_v8i64_v8f32(<8 x i64> %x) #0 { 588; NODQ-32-LABEL: uitofp_v8i64_v8f32: 589; NODQ-32: # %bb.0: 590; NODQ-32-NEXT: pushl %ebp 591; NODQ-32-NEXT: .cfi_def_cfa_offset 8 592; NODQ-32-NEXT: .cfi_offset %ebp, -8 593; NODQ-32-NEXT: movl %esp, %ebp 594; NODQ-32-NEXT: .cfi_def_cfa_register %ebp 595; NODQ-32-NEXT: andl $-8, %esp 596; NODQ-32-NEXT: subl $96, %esp 597; NODQ-32-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 598; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3] 599; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 600; NODQ-32-NEXT: vextractf128 $1, %ymm0, %xmm3 601; NODQ-32-NEXT: vmovlps %xmm3, {{[0-9]+}}(%esp) 602; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm3[2,3,2,3] 603; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 604; NODQ-32-NEXT: vextractf32x4 $2, %zmm0, %xmm2 605; NODQ-32-NEXT: vmovlps %xmm2, {{[0-9]+}}(%esp) 606; NODQ-32-NEXT: vshufps {{.*#+}} xmm1 = xmm2[2,3,2,3] 607; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 608; NODQ-32-NEXT: vextractf32x4 $3, %zmm0, %xmm1 609; NODQ-32-NEXT: vmovlps %xmm1, {{[0-9]+}}(%esp) 610; NODQ-32-NEXT: vshufps {{.*#+}} xmm4 = xmm1[2,3,2,3] 611; NODQ-32-NEXT: vmovlps %xmm4, {{[0-9]+}}(%esp) 612; NODQ-32-NEXT: vextractps $1, %xmm0, %eax 613; NODQ-32-NEXT: shrl $31, %eax 614; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 615; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 616; NODQ-32-NEXT: fstps (%esp) 617; NODQ-32-NEXT: wait 618; NODQ-32-NEXT: vextractps $3, %xmm0, %eax 619; NODQ-32-NEXT: shrl $31, %eax 620; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 621; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 622; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) 623; NODQ-32-NEXT: wait 624; NODQ-32-NEXT: vextractps $1, %xmm3, %eax 625; NODQ-32-NEXT: shrl $31, %eax 626; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 627; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 628; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) 629; NODQ-32-NEXT: wait 630; NODQ-32-NEXT: vextractps $3, %xmm3, %eax 631; NODQ-32-NEXT: shrl $31, %eax 632; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 633; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 634; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) 635; NODQ-32-NEXT: wait 636; NODQ-32-NEXT: vextractps $1, %xmm2, %eax 637; NODQ-32-NEXT: shrl $31, %eax 638; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 639; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 640; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) 641; NODQ-32-NEXT: wait 642; NODQ-32-NEXT: vextractps $3, %xmm2, %eax 643; NODQ-32-NEXT: shrl $31, %eax 644; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 645; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 646; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) 647; NODQ-32-NEXT: wait 648; NODQ-32-NEXT: vextractps $1, %xmm1, %eax 649; NODQ-32-NEXT: shrl $31, %eax 650; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 651; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 652; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) 653; NODQ-32-NEXT: wait 654; NODQ-32-NEXT: vextractps $3, %xmm1, %eax 655; NODQ-32-NEXT: shrl $31, %eax 656; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) 657; NODQ-32-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 658; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) 659; NODQ-32-NEXT: wait 660; NODQ-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 661; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 662; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 663; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 664; NODQ-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 665; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] 666; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] 667; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] 668; NODQ-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 669; NODQ-32-NEXT: movl %ebp, %esp 670; NODQ-32-NEXT: popl %ebp 671; NODQ-32-NEXT: .cfi_def_cfa %esp, 4 672; NODQ-32-NEXT: retl 673; 674; NODQ-64-LABEL: uitofp_v8i64_v8f32: 675; NODQ-64: # %bb.0: 676; NODQ-64-NEXT: vextracti32x4 $2, %zmm0, %xmm1 677; NODQ-64-NEXT: vpextrq $1, %xmm1, %rax 678; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm2, %xmm2 679; NODQ-64-NEXT: vmovq %xmm1, %rax 680; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm1 681; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[2,3] 682; NODQ-64-NEXT: vextracti32x4 $3, %zmm0, %xmm2 683; NODQ-64-NEXT: vmovq %xmm2, %rax 684; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm3, %xmm3 685; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] 686; NODQ-64-NEXT: vpextrq $1, %xmm2, %rax 687; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm4, %xmm2 688; NODQ-64-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] 689; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax 690; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm4, %xmm2 691; NODQ-64-NEXT: vmovq %xmm0, %rax 692; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm4, %xmm3 693; NODQ-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 694; NODQ-64-NEXT: vextracti128 $1, %ymm0, %xmm0 695; NODQ-64-NEXT: vmovq %xmm0, %rax 696; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm4, %xmm3 697; NODQ-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] 698; NODQ-64-NEXT: vpextrq $1, %xmm0, %rax 699; NODQ-64-NEXT: vcvtusi2ss %rax, %xmm4, %xmm0 700; NODQ-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] 701; NODQ-64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 702; NODQ-64-NEXT: retq 703; 704; DQ-LABEL: uitofp_v8i64_v8f32: 705; DQ: # %bb.0: 706; DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 707; DQ-NEXT: ret{{[l|q]}} 708 %result = call <8 x float> @llvm.experimental.constrained.uitofp.v8f32.v8i64(<8 x i64> %x, 709 metadata !"round.dynamic", 710 metadata !"fpexcept.strict") #0 711 ret <8 x float> %result 712} 713 714attributes #0 = { strictfp } 715