1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f,avx512vl,avx512dq | FileCheck %s --check-prefixes=CHECK 3 4; PR37751 - https://bugs.llvm.org/show_bug.cgi?id=37751 5; We can't combine into 'round' instructions because the behavior is different for out-of-range values. 6 7declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32) 8declare <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float>, <4 x i32>, i8) 9declare <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float>, <8 x i32>, i8) 10declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32) 11declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double>, <4 x i32>, i8) 12declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32) 13declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32) 14declare <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float>, <4 x i64>, i8) 15declare <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float>, <8 x i64>, i8, i32) 16declare <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float>, <4 x i64>, i8) 17declare <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float>, <8 x i64>, i8, i32) 18declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double>, <2 x i64>, i8) 19declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double>, <4 x i64>, i8) 20declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double>, <8 x i64>, i8, i32) 21declare <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double>, <2 x i64>, i8) 22declare <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double>, <4 x i64>, i8) 23declare <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double>, <8 x i64>, i8, i32) 24 25define <16 x float> @float_to_sint_to_float_mem_v16f32(ptr %p) { 26; CHECK-LABEL: float_to_sint_to_float_mem_v16f32: 27; CHECK: # %bb.0: 28; CHECK-NEXT: vcvttps2dq (%rdi), %zmm0 29; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 30; CHECK-NEXT: retq 31 %x = load <16 x float>, ptr %p 32 %fptosi = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) 33 %sitofp = sitofp <16 x i32> %fptosi to <16 x float> 34 ret <16 x float> %sitofp 35} 36 37define <16 x float> @float_to_sint_to_float_reg_v16f32(<16 x float> %x) { 38; CHECK-LABEL: float_to_sint_to_float_reg_v16f32: 39; CHECK: # %bb.0: 40; CHECK-NEXT: vcvttps2dq %zmm0, %zmm0 41; CHECK-NEXT: vcvtdq2ps %zmm0, %zmm0 42; CHECK-NEXT: retq 43 %fptosi = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) 44 %sitofp = sitofp <16 x i32> %fptosi to <16 x float> 45 ret <16 x float> %sitofp 46} 47 48define <16 x float> @float_to_uint_to_float_mem_v16f32(ptr %p) { 49; CHECK-LABEL: float_to_uint_to_float_mem_v16f32: 50; CHECK: # %bb.0: 51; CHECK-NEXT: vcvttps2udq (%rdi), %zmm0 52; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 53; CHECK-NEXT: retq 54 %x = load <16 x float>, ptr %p 55 %fptoui = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) 56 %uitofp = uitofp <16 x i32> %fptoui to <16 x float> 57 ret <16 x float> %uitofp 58} 59 60define <16 x float> @float_to_uint_to_float_reg_v16f32(<16 x float> %x) { 61; CHECK-LABEL: float_to_uint_to_float_reg_v16f32: 62; CHECK: # %bb.0: 63; CHECK-NEXT: vcvttps2udq %zmm0, %zmm0 64; CHECK-NEXT: vcvtudq2ps %zmm0, %zmm0 65; CHECK-NEXT: retq 66 %fptoui = tail call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x, <16 x i32> undef, i16 -1, i32 4) 67 %uitofp = uitofp <16 x i32> %fptoui to <16 x float> 68 ret <16 x float> %uitofp 69} 70 71define <4 x float> @float_to_uint_to_float_mem_v4f32(ptr %p) { 72; CHECK-LABEL: float_to_uint_to_float_mem_v4f32: 73; CHECK: # %bb.0: 74; CHECK-NEXT: vcvttps2udq (%rdi), %xmm0 75; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 76; CHECK-NEXT: retq 77 %x = load <4 x float>, ptr %p 78 %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x, <4 x i32> undef, i8 -1) 79 %uitofp = uitofp <4 x i32> %fptoui to <4 x float> 80 ret <4 x float> %uitofp 81} 82 83define <4 x float> @float_to_uint_to_float_reg_v4f32(<4 x float> %x) { 84; CHECK-LABEL: float_to_uint_to_float_reg_v4f32: 85; CHECK: # %bb.0: 86; CHECK-NEXT: vcvttps2udq %xmm0, %xmm0 87; CHECK-NEXT: vcvtudq2ps %xmm0, %xmm0 88; CHECK-NEXT: retq 89 %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttps2udq.128(<4 x float> %x, <4 x i32> undef, i8 -1) 90 %uitofp = uitofp <4 x i32> %fptoui to <4 x float> 91 ret <4 x float> %uitofp 92} 93 94define <8 x float> @float_to_uint_to_float_mem_v8f32(ptr %p) { 95; CHECK-LABEL: float_to_uint_to_float_mem_v8f32: 96; CHECK: # %bb.0: 97; CHECK-NEXT: vcvttps2udq (%rdi), %ymm0 98; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 99; CHECK-NEXT: retq 100 %x = load <8 x float>, ptr %p 101 %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x, <8 x i32> undef, i8 -1) 102 %uitofp = uitofp <8 x i32> %fptoui to <8 x float> 103 ret <8 x float> %uitofp 104} 105 106define <8 x float> @float_to_uint_to_float_reg_v8f32(<8 x float> %x) { 107; CHECK-LABEL: float_to_uint_to_float_reg_v8f32: 108; CHECK: # %bb.0: 109; CHECK-NEXT: vcvttps2udq %ymm0, %ymm0 110; CHECK-NEXT: vcvtudq2ps %ymm0, %ymm0 111; CHECK-NEXT: retq 112 %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttps2udq.256(<8 x float> %x, <8 x i32> undef, i8 -1) 113 %uitofp = uitofp <8 x i32> %fptoui to <8 x float> 114 ret <8 x float> %uitofp 115} 116 117define <4 x double> @double_to_uint_to_double_mem_v4f64(ptr %p) { 118; CHECK-LABEL: double_to_uint_to_double_mem_v4f64: 119; CHECK: # %bb.0: 120; CHECK-NEXT: vcvttpd2udqy (%rdi), %xmm0 121; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 122; CHECK-NEXT: retq 123 %x = load <4 x double>, ptr %p 124 %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x, <4 x i32> undef, i8 -1) 125 %uitofp = uitofp <4 x i32> %fptoui to <4 x double> 126 ret <4 x double> %uitofp 127} 128 129define <4 x double> @double_to_uint_to_double_reg_v4f64(<4 x double> %x) { 130; CHECK-LABEL: double_to_uint_to_double_reg_v4f64: 131; CHECK: # %bb.0: 132; CHECK-NEXT: vcvttpd2udq %ymm0, %xmm0 133; CHECK-NEXT: vcvtudq2pd %xmm0, %ymm0 134; CHECK-NEXT: retq 135 %fptoui = tail call <4 x i32> @llvm.x86.avx512.mask.cvttpd2udq.256(<4 x double> %x, <4 x i32> undef, i8 -1) 136 %uitofp = uitofp <4 x i32> %fptoui to <4 x double> 137 ret <4 x double> %uitofp 138} 139 140define <8 x double> @double_to_sint_to_double_mem_v8f64(ptr %p) { 141; CHECK-LABEL: double_to_sint_to_double_mem_v8f64: 142; CHECK: # %bb.0: 143; CHECK-NEXT: vcvttpd2dq (%rdi), %ymm0 144; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 145; CHECK-NEXT: retq 146 %x = load <8 x double>, ptr %p 147 %fptosi = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4) 148 %sitofp = sitofp <8 x i32> %fptosi to <8 x double> 149 ret <8 x double> %sitofp 150} 151 152define <8 x double> @double_to_sint_to_double_reg_v8f64(<8 x double> %x) { 153; CHECK-LABEL: double_to_sint_to_double_reg_v8f64: 154; CHECK: # %bb.0: 155; CHECK-NEXT: vcvttpd2dq %zmm0, %ymm0 156; CHECK-NEXT: vcvtdq2pd %ymm0, %zmm0 157; CHECK-NEXT: retq 158 %fptosi = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4) 159 %sitofp = sitofp <8 x i32> %fptosi to <8 x double> 160 ret <8 x double> %sitofp 161} 162 163define <8 x double> @double_to_uint_to_double_mem_v8f64(ptr %p) { 164; CHECK-LABEL: double_to_uint_to_double_mem_v8f64: 165; CHECK: # %bb.0: 166; CHECK-NEXT: vcvttpd2udq (%rdi), %ymm0 167; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 168; CHECK-NEXT: retq 169 %x = load <8 x double>, ptr %p 170 %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4) 171 %uitofp = uitofp <8 x i32> %fptoui to <8 x double> 172 ret <8 x double> %uitofp 173} 174 175define <8 x double> @double_to_uint_to_double_reg_v8f64(<8 x double> %x) { 176; CHECK-LABEL: double_to_uint_to_double_reg_v8f64: 177; CHECK: # %bb.0: 178; CHECK-NEXT: vcvttpd2udq %zmm0, %ymm0 179; CHECK-NEXT: vcvtudq2pd %ymm0, %zmm0 180; CHECK-NEXT: retq 181 %fptoui = tail call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x, <8 x i32> undef, i8 -1, i32 4) 182 %uitofp = uitofp <8 x i32> %fptoui to <8 x double> 183 ret <8 x double> %uitofp 184} 185 186define <4 x float> @float_to_sint64_to_float_mem_v4f32(ptr %p) { 187; CHECK-LABEL: float_to_sint64_to_float_mem_v4f32: 188; CHECK: # %bb.0: 189; CHECK-NEXT: vcvttps2qq (%rdi), %ymm0 190; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 191; CHECK-NEXT: vzeroupper 192; CHECK-NEXT: retq 193 %x = load <4 x float>, ptr %p 194 %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x, <4 x i64> undef, i8 -1) 195 %sitofp = sitofp <4 x i64> %fptosi to <4 x float> 196 ret <4 x float> %sitofp 197} 198 199define <4 x float> @float_to_sint64_to_float_reg_v4f32(<4 x float> %x) { 200; CHECK-LABEL: float_to_sint64_to_float_reg_v4f32: 201; CHECK: # %bb.0: 202; CHECK-NEXT: vcvttps2qq %xmm0, %ymm0 203; CHECK-NEXT: vcvtqq2ps %ymm0, %xmm0 204; CHECK-NEXT: vzeroupper 205; CHECK-NEXT: retq 206 %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2qq.256(<4 x float> %x, <4 x i64> undef, i8 -1) 207 %sitofp = sitofp <4 x i64> %fptosi to <4 x float> 208 ret <4 x float> %sitofp 209} 210 211define <4 x float> @float_to_uint64_to_float_mem_v4f32(ptr %p) { 212; CHECK-LABEL: float_to_uint64_to_float_mem_v4f32: 213; CHECK: # %bb.0: 214; CHECK-NEXT: vcvttps2uqq (%rdi), %ymm0 215; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 216; CHECK-NEXT: vzeroupper 217; CHECK-NEXT: retq 218 %x = load <4 x float>, ptr %p 219 %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x, <4 x i64> undef, i8 -1) 220 %uitofp = uitofp <4 x i64> %fptoui to <4 x float> 221 ret <4 x float> %uitofp 222} 223 224define <4 x float> @float_to_uint64_to_float_reg_v4f32(<4 x float> %x) { 225; CHECK-LABEL: float_to_uint64_to_float_reg_v4f32: 226; CHECK: # %bb.0: 227; CHECK-NEXT: vcvttps2uqq %xmm0, %ymm0 228; CHECK-NEXT: vcvtuqq2ps %ymm0, %xmm0 229; CHECK-NEXT: vzeroupper 230; CHECK-NEXT: retq 231 %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttps2uqq.256(<4 x float> %x, <4 x i64> undef, i8 -1) 232 %uitofp = uitofp <4 x i64> %fptoui to <4 x float> 233 ret <4 x float> %uitofp 234} 235 236define <8 x float> @float_to_sint64_to_float_mem_v8f32(ptr %p) { 237; CHECK-LABEL: float_to_sint64_to_float_mem_v8f32: 238; CHECK: # %bb.0: 239; CHECK-NEXT: vcvttps2qq (%rdi), %zmm0 240; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0 241; CHECK-NEXT: retq 242 %x = load <8 x float>, ptr %p 243 %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) 244 %sitofp = sitofp <8 x i64> %fptosi to <8 x float> 245 ret <8 x float> %sitofp 246} 247 248define <8 x float> @float_to_sint64_to_float_reg_v8f32(<8 x float> %x) { 249; CHECK-LABEL: float_to_sint64_to_float_reg_v8f32: 250; CHECK: # %bb.0: 251; CHECK-NEXT: vcvttps2qq %ymm0, %zmm0 252; CHECK-NEXT: vcvtqq2ps %zmm0, %ymm0 253; CHECK-NEXT: retq 254 %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2qq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) 255 %sitofp = sitofp <8 x i64> %fptosi to <8 x float> 256 ret <8 x float> %sitofp 257} 258 259define <8 x float> @float_to_uint64_to_float_mem_v8f32(ptr %p) { 260; CHECK-LABEL: float_to_uint64_to_float_mem_v8f32: 261; CHECK: # %bb.0: 262; CHECK-NEXT: vcvttps2uqq (%rdi), %zmm0 263; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0 264; CHECK-NEXT: retq 265 %x = load <8 x float>, ptr %p 266 %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) 267 %uitofp = uitofp <8 x i64> %fptoui to <8 x float> 268 ret <8 x float> %uitofp 269} 270 271define <8 x float> @float_to_uint64_to_float_reg_v8f32(<8 x float> %x) { 272; CHECK-LABEL: float_to_uint64_to_float_reg_v8f32: 273; CHECK: # %bb.0: 274; CHECK-NEXT: vcvttps2uqq %ymm0, %zmm0 275; CHECK-NEXT: vcvtuqq2ps %zmm0, %ymm0 276; CHECK-NEXT: retq 277 %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttps2uqq.512(<8 x float> %x, <8 x i64> undef, i8 -1, i32 4) 278 %uitofp = uitofp <8 x i64> %fptoui to <8 x float> 279 ret <8 x float> %uitofp 280} 281 282define <2 x double> @double_to_sint64_to_double_mem_v2f64(ptr %p) { 283; CHECK-LABEL: double_to_sint64_to_double_mem_v2f64: 284; CHECK: # %bb.0: 285; CHECK-NEXT: vcvttpd2qq (%rdi), %xmm0 286; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 287; CHECK-NEXT: retq 288 %x = load <2 x double>, ptr %p 289 %fptosi = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x, <2 x i64> undef, i8 -1) 290 %sitofp = sitofp <2 x i64> %fptosi to <2 x double> 291 ret <2 x double> %sitofp 292} 293 294define <2 x double> @double_to_sint64_to_double_reg_v2f64(<2 x double> %x) { 295; CHECK-LABEL: double_to_sint64_to_double_reg_v2f64: 296; CHECK: # %bb.0: 297; CHECK-NEXT: vcvttpd2qq %xmm0, %xmm0 298; CHECK-NEXT: vcvtqq2pd %xmm0, %xmm0 299; CHECK-NEXT: retq 300 %fptosi = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2qq.128(<2 x double> %x, <2 x i64> undef, i8 -1) 301 %sitofp = sitofp <2 x i64> %fptosi to <2 x double> 302 ret <2 x double> %sitofp 303} 304 305define <2 x double> @double_to_uint64_to_double_mem_v2f64(ptr %p) { 306; CHECK-LABEL: double_to_uint64_to_double_mem_v2f64: 307; CHECK: # %bb.0: 308; CHECK-NEXT: vcvttpd2uqq (%rdi), %xmm0 309; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 310; CHECK-NEXT: retq 311 %x = load <2 x double>, ptr %p 312 %fptoui = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x, <2 x i64> undef, i8 -1) 313 %uitofp = uitofp <2 x i64> %fptoui to <2 x double> 314 ret <2 x double> %uitofp 315} 316 317define <2 x double> @double_to_uint64_to_double_reg_v2f64(<2 x double> %x) { 318; CHECK-LABEL: double_to_uint64_to_double_reg_v2f64: 319; CHECK: # %bb.0: 320; CHECK-NEXT: vcvttpd2uqq %xmm0, %xmm0 321; CHECK-NEXT: vcvtuqq2pd %xmm0, %xmm0 322; CHECK-NEXT: retq 323 %fptoui = tail call <2 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.128(<2 x double> %x, <2 x i64> undef, i8 -1) 324 %uitofp = uitofp <2 x i64> %fptoui to <2 x double> 325 ret <2 x double> %uitofp 326} 327 328define <4 x double> @double_to_sint64_to_double_mem_v4f64(ptr %p) { 329; CHECK-LABEL: double_to_sint64_to_double_mem_v4f64: 330; CHECK: # %bb.0: 331; CHECK-NEXT: vcvttpd2qq (%rdi), %ymm0 332; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 333; CHECK-NEXT: retq 334 %x = load <4 x double>, ptr %p 335 %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x, <4 x i64> undef, i8 -1) 336 %sitofp = sitofp <4 x i64> %fptosi to <4 x double> 337 ret <4 x double> %sitofp 338} 339 340define <4 x double> @double_to_sint64_to_double_reg_v4f64(<4 x double> %x) { 341; CHECK-LABEL: double_to_sint64_to_double_reg_v4f64: 342; CHECK: # %bb.0: 343; CHECK-NEXT: vcvttpd2qq %ymm0, %ymm0 344; CHECK-NEXT: vcvtqq2pd %ymm0, %ymm0 345; CHECK-NEXT: retq 346 %fptosi = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2qq.256(<4 x double> %x, <4 x i64> undef, i8 -1) 347 %sitofp = sitofp <4 x i64> %fptosi to <4 x double> 348 ret <4 x double> %sitofp 349} 350 351define <4 x double> @double_to_uint64_to_double_mem_v4f64(ptr %p) { 352; CHECK-LABEL: double_to_uint64_to_double_mem_v4f64: 353; CHECK: # %bb.0: 354; CHECK-NEXT: vcvttpd2uqq (%rdi), %ymm0 355; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0 356; CHECK-NEXT: retq 357 %x = load <4 x double>, ptr %p 358 %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x, <4 x i64> undef, i8 -1) 359 %uitofp = uitofp <4 x i64> %fptoui to <4 x double> 360 ret <4 x double> %uitofp 361} 362 363define <4 x double> @double_to_uint64_to_double_reg_v4f64(<4 x double> %x) { 364; CHECK-LABEL: double_to_uint64_to_double_reg_v4f64: 365; CHECK: # %bb.0: 366; CHECK-NEXT: vcvttpd2uqq %ymm0, %ymm0 367; CHECK-NEXT: vcvtuqq2pd %ymm0, %ymm0 368; CHECK-NEXT: retq 369 %fptoui = tail call <4 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.256(<4 x double> %x, <4 x i64> undef, i8 -1) 370 %uitofp = uitofp <4 x i64> %fptoui to <4 x double> 371 ret <4 x double> %uitofp 372} 373 374define <8 x double> @double_to_sint64_to_double_mem_v8f64(ptr %p) { 375; CHECK-LABEL: double_to_sint64_to_double_mem_v8f64: 376; CHECK: # %bb.0: 377; CHECK-NEXT: vcvttpd2qq (%rdi), %zmm0 378; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 379; CHECK-NEXT: retq 380 %x = load <8 x double>, ptr %p 381 %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) 382 %sitofp = sitofp <8 x i64> %fptosi to <8 x double> 383 ret <8 x double> %sitofp 384} 385 386define <8 x double> @double_to_sint64_to_double_reg_v8f64(<8 x double> %x) { 387; CHECK-LABEL: double_to_sint64_to_double_reg_v8f64: 388; CHECK: # %bb.0: 389; CHECK-NEXT: vcvttpd2qq %zmm0, %zmm0 390; CHECK-NEXT: vcvtqq2pd %zmm0, %zmm0 391; CHECK-NEXT: retq 392 %fptosi = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2qq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) 393 %sitofp = sitofp <8 x i64> %fptosi to <8 x double> 394 ret <8 x double> %sitofp 395} 396 397define <8 x double> @double_to_uint64_to_double_mem_v8f64(ptr %p) { 398; CHECK-LABEL: double_to_uint64_to_double_mem_v8f64: 399; CHECK: # %bb.0: 400; CHECK-NEXT: vcvttpd2uqq (%rdi), %zmm0 401; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0 402; CHECK-NEXT: retq 403 %x = load <8 x double>, ptr %p 404 %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) 405 %uitofp = uitofp <8 x i64> %fptoui to <8 x double> 406 ret <8 x double> %uitofp 407} 408 409define <8 x double> @double_to_uint64_to_double_reg_v8f64(<8 x double> %x) { 410; CHECK-LABEL: double_to_uint64_to_double_reg_v8f64: 411; CHECK: # %bb.0: 412; CHECK-NEXT: vcvttpd2uqq %zmm0, %zmm0 413; CHECK-NEXT: vcvtuqq2pd %zmm0, %zmm0 414; CHECK-NEXT: retq 415 %fptoui = tail call <8 x i64> @llvm.x86.avx512.mask.cvttpd2uqq.512(<8 x double> %x, <8 x i64> undef, i8 -1, i32 4) 416 %uitofp = uitofp <8 x i64> %fptoui to <8 x double> 417 ret <8 x double> %uitofp 418} 419