1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -O2 -mattr=avx512f -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 3; RUN: llc < %s -O2 -mattr=avx512f -mtriple=i386-unknown | FileCheck %s --check-prefix=CHECK32 4; RUN: llc < %s -O2 -mattr=avx512vl -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64 5; RUN: llc < %s -O2 -mattr=avx512vl -mtriple=i386-unknown | FileCheck %s --check-prefix=CHECK32 6 7define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) local_unnamed_addr #0 { 8; CHECK64-LABEL: test_mm_mask_move_ss: 9; CHECK64: # %bb.0: # %entry 10; CHECK64-NEXT: kmovw %edi, %k1 11; CHECK64-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} 12; CHECK64-NEXT: retq 13; 14; CHECK32-LABEL: test_mm_mask_move_ss: 15; CHECK32: # %bb.0: # %entry 16; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 17; CHECK32-NEXT: kmovw %eax, %k1 18; CHECK32-NEXT: vmovss %xmm2, %xmm1, %xmm0 {%k1} 19; CHECK32-NEXT: retl 20entry: 21 %0 = and i8 %__U, 1 22 %tobool.i = icmp ne i8 %0, 0 23 %__B.elt.i = extractelement <4 x float> %__B, i32 0 24 %__W.elt.i = extractelement <4 x float> %__W, i32 0 25 %vecext1.i = select i1 %tobool.i, float %__B.elt.i, float %__W.elt.i 26 %vecins.i = insertelement <4 x float> %__A, float %vecext1.i, i32 0 27 ret <4 x float> %vecins.i 28} 29 30define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) local_unnamed_addr #0 { 31; CHECK64-LABEL: test_mm_maskz_move_ss: 32; CHECK64: # %bb.0: # %entry 33; CHECK64-NEXT: kmovw %edi, %k1 34; CHECK64-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} 35; CHECK64-NEXT: retq 36; 37; CHECK32-LABEL: test_mm_maskz_move_ss: 38; CHECK32: # %bb.0: # %entry 39; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 40; CHECK32-NEXT: kmovw %eax, %k1 41; CHECK32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} 42; CHECK32-NEXT: retl 43entry: 44 %0 = and i8 %__U, 1 45 %tobool.i = icmp ne i8 %0, 0 46 %vecext.i = extractelement <4 x float> %__B, i32 0 47 %cond.i = select i1 %tobool.i, float %vecext.i, float 0.000000e+00 48 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0 49 ret <4 x float> %vecins.i 50} 51 52define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) local_unnamed_addr #0 { 53; CHECK64-LABEL: test_mm_mask_move_sd: 54; CHECK64: # %bb.0: # %entry 55; CHECK64-NEXT: kmovw %edi, %k1 56; CHECK64-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} 57; CHECK64-NEXT: retq 58; 59; CHECK32-LABEL: test_mm_mask_move_sd: 60; CHECK32: # %bb.0: # %entry 61; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 62; CHECK32-NEXT: kmovw %eax, %k1 63; CHECK32-NEXT: vmovsd %xmm2, %xmm1, %xmm0 {%k1} 64; CHECK32-NEXT: retl 65entry: 66 %0 = and i8 %__U, 1 67 %tobool.i = icmp ne i8 %0, 0 68 %__B.elt.i = extractelement <2 x double> %__B, i32 0 69 %__W.elt.i = extractelement <2 x double> %__W, i32 0 70 %vecext1.i = select i1 %tobool.i, double %__B.elt.i, double %__W.elt.i 71 %vecins.i = insertelement <2 x double> %__A, double %vecext1.i, i32 0 72 ret <2 x double> %vecins.i 73} 74 75define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) local_unnamed_addr #0 { 76; CHECK64-LABEL: test_mm_maskz_move_sd: 77; CHECK64: # %bb.0: # %entry 78; CHECK64-NEXT: kmovw %edi, %k1 79; CHECK64-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} 80; CHECK64-NEXT: retq 81; 82; CHECK32-LABEL: test_mm_maskz_move_sd: 83; CHECK32: # %bb.0: # %entry 84; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax 85; CHECK32-NEXT: kmovw %eax, %k1 86; CHECK32-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} 87; CHECK32-NEXT: retl 88entry: 89 %0 = and i8 %__U, 1 90 %tobool.i = icmp ne i8 %0, 0 91 %vecext.i = extractelement <2 x double> %__B, i32 0 92 %cond.i = select i1 %tobool.i, double %vecext.i, double 0.000000e+00 93 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0 94 ret <2 x double> %vecins.i 95} 96 97define void @test_mm_mask_store_ss(ptr %__W, i8 zeroext %__U, <4 x float> %__A) local_unnamed_addr #1 { 98; CHECK64-LABEL: test_mm_mask_store_ss: 99; CHECK64: # %bb.0: # %entry 100; CHECK64-NEXT: kmovw %esi, %k1 101; CHECK64-NEXT: vmovss %xmm0, (%rdi) {%k1} 102; CHECK64-NEXT: retq 103; 104; CHECK32-LABEL: test_mm_mask_store_ss: 105; CHECK32: # %bb.0: # %entry 106; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 107; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 108; CHECK32-NEXT: kmovw %ecx, %k1 109; CHECK32-NEXT: vmovss %xmm0, (%eax) {%k1} 110; CHECK32-NEXT: retl 111entry: 112 %shuffle.i.i = shufflevector <4 x float> %__A, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 113 %0 = and i8 %__U, 1 114 %conv2.i = zext i8 %0 to i16 115 %1 = bitcast i16 %conv2.i to <16 x i1> 116 tail call void @llvm.masked.store.v16f32.p0(<16 x float> %shuffle.i.i, ptr %__W, i32 16, <16 x i1> %1) #5 117 ret void 118} 119 120define void @test_mm_mask_store_sd(ptr %__W, i8 zeroext %__U, <2 x double> %__A) local_unnamed_addr #1 { 121; CHECK64-LABEL: test_mm_mask_store_sd: 122; CHECK64: # %bb.0: # %entry 123; CHECK64-NEXT: kmovw %esi, %k1 124; CHECK64-NEXT: vmovsd %xmm0, (%rdi) {%k1} 125; CHECK64-NEXT: retq 126; 127; CHECK32-LABEL: test_mm_mask_store_sd: 128; CHECK32: # %bb.0: # %entry 129; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 130; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 131; CHECK32-NEXT: kmovw %ecx, %k1 132; CHECK32-NEXT: vmovsd %xmm0, (%eax) {%k1} 133; CHECK32-NEXT: retl 134entry: 135 %shuffle.i.i = shufflevector <2 x double> %__A, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 136 %0 = and i8 %__U, 1 137 %1 = bitcast i8 %0 to <8 x i1> 138 tail call void @llvm.masked.store.v8f64.p0(<8 x double> %shuffle.i.i, ptr %__W, i32 16, <8 x i1> %1) #5 139 ret void 140} 141 142define <4 x float> @test_mm_mask_load_ss(<4 x float> %__A, i8 zeroext %__U, ptr %__W) local_unnamed_addr #2 { 143; CHECK64-LABEL: test_mm_mask_load_ss: 144; CHECK64: # %bb.0: # %entry 145; CHECK64-NEXT: kmovw %edi, %k1 146; CHECK64-NEXT: vmovss (%rsi), %xmm0 {%k1} 147; CHECK64-NEXT: retq 148; 149; CHECK32-LABEL: test_mm_mask_load_ss: 150; CHECK32: # %bb.0: # %entry 151; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 152; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 153; CHECK32-NEXT: kmovw %ecx, %k1 154; CHECK32-NEXT: vmovss (%eax), %xmm0 {%k1} 155; CHECK32-NEXT: retl 156entry: 157 %shuffle.i = shufflevector <4 x float> %__A, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 4, i32 4> 158 %shuffle.i.i = shufflevector <4 x float> %shuffle.i, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 159 %0 = and i8 %__U, 1 160 %conv2.i = zext i8 %0 to i16 161 %1 = bitcast i16 %conv2.i to <16 x i1> 162 %2 = tail call <16 x float> @llvm.masked.load.v16f32.p0(ptr %__W, i32 16, <16 x i1> %1, <16 x float> %shuffle.i.i) #5 163 %shuffle4.i = shufflevector <16 x float> %2, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 164 ret <4 x float> %shuffle4.i 165} 166 167define <2 x double> @test_mm_mask_load_sd(<2 x double> %__A, i8 zeroext %__U, ptr %__W) local_unnamed_addr #2 { 168; CHECK64-LABEL: test_mm_mask_load_sd: 169; CHECK64: # %bb.0: # %entry 170; CHECK64-NEXT: kmovw %edi, %k1 171; CHECK64-NEXT: vmovsd (%rsi), %xmm0 {%k1} 172; CHECK64-NEXT: retq 173; 174; CHECK32-LABEL: test_mm_mask_load_sd: 175; CHECK32: # %bb.0: # %entry 176; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 177; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 178; CHECK32-NEXT: kmovw %ecx, %k1 179; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1} 180; CHECK32-NEXT: retl 181entry: 182 %shuffle5.i = insertelement <2 x double> %__A, double 0.000000e+00, i32 1 183 %shuffle.i.i = shufflevector <2 x double> %shuffle5.i, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 184 %0 = and i8 %__U, 1 185 %1 = bitcast i8 %0 to <8 x i1> 186 %2 = tail call <8 x double> @llvm.masked.load.v8f64.p0(ptr %__W, i32 16, <8 x i1> %1, <8 x double> %shuffle.i.i) #5 187 %shuffle3.i = shufflevector <8 x double> %2, <8 x double> undef, <2 x i32> <i32 0, i32 1> 188 ret <2 x double> %shuffle3.i 189} 190 191define <4 x float> @test_mm_maskz_load_ss(i8 zeroext %__U, ptr %__W) local_unnamed_addr #2 { 192; CHECK64-LABEL: test_mm_maskz_load_ss: 193; CHECK64: # %bb.0: # %entry 194; CHECK64-NEXT: kmovw %edi, %k1 195; CHECK64-NEXT: vmovss (%rsi), %xmm0 {%k1} {z} 196; CHECK64-NEXT: retq 197; 198; CHECK32-LABEL: test_mm_maskz_load_ss: 199; CHECK32: # %bb.0: # %entry 200; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 201; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 202; CHECK32-NEXT: kmovw %ecx, %k1 203; CHECK32-NEXT: vmovss (%eax), %xmm0 {%k1} {z} 204; CHECK32-NEXT: retl 205entry: 206 %0 = and i8 %__U, 1 207 %conv2.i = zext i8 %0 to i16 208 %1 = bitcast i16 %conv2.i to <16 x i1> 209 %2 = tail call <16 x float> @llvm.masked.load.v16f32.p0(ptr %__W, i32 16, <16 x i1> %1, <16 x float> zeroinitializer) #5 210 %shuffle.i = shufflevector <16 x float> %2, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 211 ret <4 x float> %shuffle.i 212} 213 214define <2 x double> @test_mm_maskz_load_sd(i8 zeroext %__U, ptr %__W) local_unnamed_addr #2 { 215; CHECK64-LABEL: test_mm_maskz_load_sd: 216; CHECK64: # %bb.0: # %entry 217; CHECK64-NEXT: kmovw %edi, %k1 218; CHECK64-NEXT: vmovsd (%rsi), %xmm0 {%k1} {z} 219; CHECK64-NEXT: retq 220; 221; CHECK32-LABEL: test_mm_maskz_load_sd: 222; CHECK32: # %bb.0: # %entry 223; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 224; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 225; CHECK32-NEXT: kmovw %ecx, %k1 226; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1} {z} 227; CHECK32-NEXT: retl 228entry: 229 %0 = and i8 %__U, 1 230 %1 = bitcast i8 %0 to <8 x i1> 231 %2 = tail call <8 x double> @llvm.masked.load.v8f64.p0(ptr %__W, i32 16, <8 x i1> %1, <8 x double> zeroinitializer) #5 232 %shuffle.i = shufflevector <8 x double> %2, <8 x double> undef, <2 x i32> <i32 0, i32 1> 233 ret <2 x double> %shuffle.i 234} 235 236; The tests below match clang's newer codegen that uses 128-bit masked load/stores. 237 238define void @test_mm_mask_store_ss_2(ptr %__P, i8 zeroext %__U, <4 x float> %__A) { 239; CHECK64-LABEL: test_mm_mask_store_ss_2: 240; CHECK64: # %bb.0: # %entry 241; CHECK64-NEXT: kmovw %esi, %k1 242; CHECK64-NEXT: vmovss %xmm0, (%rdi) {%k1} 243; CHECK64-NEXT: retq 244; 245; CHECK32-LABEL: test_mm_mask_store_ss_2: 246; CHECK32: # %bb.0: # %entry 247; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 248; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 249; CHECK32-NEXT: kmovw %ecx, %k1 250; CHECK32-NEXT: vmovss %xmm0, (%eax) {%k1} 251; CHECK32-NEXT: retl 252entry: 253 %0 = and i8 %__U, 1 254 %1 = bitcast i8 %0 to <8 x i1> 255 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 256 tail call void @llvm.masked.store.v4f32.p0(<4 x float> %__A, ptr %__P, i32 1, <4 x i1> %extract.i) 257 ret void 258} 259 260define void @test_mm_mask_store_sd_2(ptr %__P, i8 zeroext %__U, <2 x double> %__A) { 261; CHECK64-LABEL: test_mm_mask_store_sd_2: 262; CHECK64: # %bb.0: # %entry 263; CHECK64-NEXT: kmovw %esi, %k1 264; CHECK64-NEXT: vmovsd %xmm0, (%rdi) {%k1} 265; CHECK64-NEXT: retq 266; 267; CHECK32-LABEL: test_mm_mask_store_sd_2: 268; CHECK32: # %bb.0: # %entry 269; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 270; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 271; CHECK32-NEXT: kmovw %ecx, %k1 272; CHECK32-NEXT: vmovsd %xmm0, (%eax) {%k1} 273; CHECK32-NEXT: retl 274entry: 275 %0 = and i8 %__U, 1 276 %1 = bitcast i8 %0 to <8 x i1> 277 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 278 tail call void @llvm.masked.store.v2f64.p0(<2 x double> %__A, ptr %__P, i32 1, <2 x i1> %extract.i) 279 ret void 280} 281 282define <4 x float> @test_mm_mask_load_ss_2(<4 x float> %__A, i8 zeroext %__U, ptr readonly %__W) { 283; CHECK64-LABEL: test_mm_mask_load_ss_2: 284; CHECK64: # %bb.0: # %entry 285; CHECK64-NEXT: kmovw %edi, %k1 286; CHECK64-NEXT: vmovss (%rsi), %xmm0 {%k1} 287; CHECK64-NEXT: retq 288; 289; CHECK32-LABEL: test_mm_mask_load_ss_2: 290; CHECK32: # %bb.0: # %entry 291; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 292; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 293; CHECK32-NEXT: kmovw %ecx, %k1 294; CHECK32-NEXT: vmovss (%eax), %xmm0 {%k1} 295; CHECK32-NEXT: retl 296entry: 297 %shuffle.i = shufflevector <4 x float> %__A, <4 x float> <float 0.000000e+00, float undef, float undef, float undef>, <4 x i32> <i32 0, i32 4, i32 4, i32 4> 298 %0 = and i8 %__U, 1 299 %1 = bitcast i8 %0 to <8 x i1> 300 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 301 %2 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %__W, i32 1, <4 x i1> %extract.i, <4 x float> %shuffle.i) 302 ret <4 x float> %2 303} 304 305define <4 x float> @test_mm_maskz_load_ss_2(i8 zeroext %__U, ptr readonly %__W) { 306; CHECK64-LABEL: test_mm_maskz_load_ss_2: 307; CHECK64: # %bb.0: # %entry 308; CHECK64-NEXT: kmovw %edi, %k1 309; CHECK64-NEXT: vmovss (%rsi), %xmm0 {%k1} {z} 310; CHECK64-NEXT: retq 311; 312; CHECK32-LABEL: test_mm_maskz_load_ss_2: 313; CHECK32: # %bb.0: # %entry 314; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 315; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 316; CHECK32-NEXT: kmovw %ecx, %k1 317; CHECK32-NEXT: vmovss (%eax), %xmm0 {%k1} {z} 318; CHECK32-NEXT: retl 319entry: 320 %0 = and i8 %__U, 1 321 %1 = bitcast i8 %0 to <8 x i1> 322 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 323 %2 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %__W, i32 1, <4 x i1> %extract.i, <4 x float> zeroinitializer) 324 ret <4 x float> %2 325} 326 327define <2 x double> @test_mm_mask_load_sd_2(<2 x double> %__A, i8 zeroext %__U, ptr readonly %__W) { 328; CHECK64-LABEL: test_mm_mask_load_sd_2: 329; CHECK64: # %bb.0: # %entry 330; CHECK64-NEXT: kmovw %edi, %k1 331; CHECK64-NEXT: vmovsd (%rsi), %xmm0 {%k1} 332; CHECK64-NEXT: retq 333; 334; CHECK32-LABEL: test_mm_mask_load_sd_2: 335; CHECK32: # %bb.0: # %entry 336; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 337; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 338; CHECK32-NEXT: kmovw %ecx, %k1 339; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1} 340; CHECK32-NEXT: retl 341entry: 342 %shuffle3.i = insertelement <2 x double> %__A, double 0.000000e+00, i32 1 343 %0 = and i8 %__U, 1 344 %1 = bitcast i8 %0 to <8 x i1> 345 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 346 %2 = tail call <2 x double> @llvm.masked.load.v2f64.p0(ptr %__W, i32 1, <2 x i1> %extract.i, <2 x double> %shuffle3.i) 347 ret <2 x double> %2 348} 349 350define <2 x double> @test_mm_maskz_load_sd_2(i8 zeroext %__U, ptr readonly %__W) { 351; CHECK64-LABEL: test_mm_maskz_load_sd_2: 352; CHECK64: # %bb.0: # %entry 353; CHECK64-NEXT: kmovw %edi, %k1 354; CHECK64-NEXT: vmovsd (%rsi), %xmm0 {%k1} {z} 355; CHECK64-NEXT: retq 356; 357; CHECK32-LABEL: test_mm_maskz_load_sd_2: 358; CHECK32: # %bb.0: # %entry 359; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %eax 360; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 361; CHECK32-NEXT: kmovw %ecx, %k1 362; CHECK32-NEXT: vmovsd (%eax), %xmm0 {%k1} {z} 363; CHECK32-NEXT: retl 364entry: 365 %0 = and i8 %__U, 1 366 %1 = bitcast i8 %0 to <8 x i1> 367 %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1> 368 %2 = tail call <2 x double> @llvm.masked.load.v2f64.p0(ptr %__W, i32 1, <2 x i1> %extract.i, <2 x double> zeroinitializer) 369 ret <2 x double> %2 370} 371 372 373declare void @llvm.masked.store.v16f32.p0(<16 x float>, ptr, i32, <16 x i1>) #3 374 375declare void @llvm.masked.store.v8f64.p0(<8 x double>, ptr, i32, <8 x i1>) #3 376 377declare <16 x float> @llvm.masked.load.v16f32.p0(ptr, i32, <16 x i1>, <16 x float>) #4 378 379declare <8 x double> @llvm.masked.load.v8f64.p0(ptr, i32, <8 x i1>, <8 x double>) #4 380 381declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) 382 383declare void @llvm.masked.store.v2f64.p0(<2 x double>, ptr, i32, <2 x i1>) 384 385declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>) 386 387declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>) 388