1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s 3 4define <64 x i8> @test1(ptr %addr) { 5; CHECK-LABEL: test1: 6; CHECK: ## %bb.0: 7; CHECK-NEXT: vmovups (%rdi), %zmm0 8; CHECK-NEXT: retq 9 %res = load <64 x i8>, ptr %addr, align 1 10 ret <64 x i8>%res 11} 12 13define void @test2(ptr %addr, <64 x i8> %data) { 14; CHECK-LABEL: test2: 15; CHECK: ## %bb.0: 16; CHECK-NEXT: vmovups %zmm0, (%rdi) 17; CHECK-NEXT: retq 18 store <64 x i8>%data, ptr %addr, align 1 19 ret void 20} 21 22define <64 x i8> @test3(ptr %addr, <64 x i8> %old, <64 x i8> %mask1) { 23; CHECK-LABEL: test3: 24; CHECK: ## %bb.0: 25; CHECK-NEXT: vptestmb %zmm1, %zmm1, %k1 26; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} 27; CHECK-NEXT: retq 28 %mask = icmp ne <64 x i8> %mask1, zeroinitializer 29 %r = load <64 x i8>, ptr %addr, align 1 30 %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> %old 31 ret <64 x i8>%res 32} 33 34define <64 x i8> @test4(ptr %addr, <64 x i8> %mask1) { 35; CHECK-LABEL: test4: 36; CHECK: ## %bb.0: 37; CHECK-NEXT: vptestmb %zmm0, %zmm0, %k1 38; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} 39; CHECK-NEXT: retq 40 %mask = icmp ne <64 x i8> %mask1, zeroinitializer 41 %r = load <64 x i8>, ptr %addr, align 1 42 %res = select <64 x i1> %mask, <64 x i8> %r, <64 x i8> zeroinitializer 43 ret <64 x i8>%res 44} 45 46define <32 x i16> @test5(ptr %addr) { 47; CHECK-LABEL: test5: 48; CHECK: ## %bb.0: 49; CHECK-NEXT: vmovups (%rdi), %zmm0 50; CHECK-NEXT: retq 51 %res = load <32 x i16>, ptr %addr, align 1 52 ret <32 x i16>%res 53} 54 55define void @test6(ptr %addr, <32 x i16> %data) { 56; CHECK-LABEL: test6: 57; CHECK: ## %bb.0: 58; CHECK-NEXT: vmovups %zmm0, (%rdi) 59; CHECK-NEXT: retq 60 store <32 x i16>%data, ptr %addr, align 1 61 ret void 62} 63 64define <32 x i16> @test7(ptr %addr, <32 x i16> %old, <32 x i16> %mask1) { 65; CHECK-LABEL: test7: 66; CHECK: ## %bb.0: 67; CHECK-NEXT: vptestmw %zmm1, %zmm1, %k1 68; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} 69; CHECK-NEXT: retq 70 %mask = icmp ne <32 x i16> %mask1, zeroinitializer 71 %r = load <32 x i16>, ptr %addr, align 1 72 %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> %old 73 ret <32 x i16>%res 74} 75 76define <32 x i16> @test8(ptr %addr, <32 x i16> %mask1) { 77; CHECK-LABEL: test8: 78; CHECK: ## %bb.0: 79; CHECK-NEXT: vptestmw %zmm0, %zmm0, %k1 80; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} 81; CHECK-NEXT: retq 82 %mask = icmp ne <32 x i16> %mask1, zeroinitializer 83 %r = load <32 x i16>, ptr %addr, align 1 84 %res = select <32 x i1> %mask, <32 x i16> %r, <32 x i16> zeroinitializer 85 ret <32 x i16>%res 86} 87 88define <16 x i8> @test_mask_load_16xi8(<16 x i1> %mask, ptr %addr, <16 x i8> %val) { 89; CHECK-LABEL: test_mask_load_16xi8: 90; CHECK: ## %bb.0: 91; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 92; CHECK-NEXT: vpmovb2m %zmm0, %k0 93; CHECK-NEXT: kmovw %k0, %k1 94; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} 95; CHECK-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 96; CHECK-NEXT: retq 97 %res = call <16 x i8> @llvm.masked.load.v16i8(ptr %addr, i32 4, <16 x i1>%mask, <16 x i8> undef) 98 ret <16 x i8> %res 99} 100declare <16 x i8> @llvm.masked.load.v16i8(ptr, i32, <16 x i1>, <16 x i8>) 101 102define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, ptr %addr, <32 x i8> %val) { 103; CHECK-LABEL: test_mask_load_32xi8: 104; CHECK: ## %bb.0: 105; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 106; CHECK-NEXT: vpmovb2m %zmm0, %k0 107; CHECK-NEXT: kmovd %k0, %k1 108; CHECK-NEXT: vmovdqu8 (%rdi), %zmm0 {%k1} {z} 109; CHECK-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 110; CHECK-NEXT: retq 111 %res = call <32 x i8> @llvm.masked.load.v32i8(ptr %addr, i32 4, <32 x i1>%mask, <32 x i8> zeroinitializer) 112 ret <32 x i8> %res 113} 114declare <32 x i8> @llvm.masked.load.v32i8(ptr, i32, <32 x i1>, <32 x i8>) 115 116define <8 x i16> @test_mask_load_8xi16(<8 x i1> %mask, ptr %addr, <8 x i16> %val) { 117; CHECK-LABEL: test_mask_load_8xi16: 118; CHECK: ## %bb.0: 119; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 120; CHECK-NEXT: vpmovw2m %zmm0, %k0 121; CHECK-NEXT: kshiftld $24, %k0, %k0 122; CHECK-NEXT: kshiftrd $24, %k0, %k1 123; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} 124; CHECK-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 125; CHECK-NEXT: retq 126 %res = call <8 x i16> @llvm.masked.load.v8i16(ptr %addr, i32 4, <8 x i1>%mask, <8 x i16> undef) 127 ret <8 x i16> %res 128} 129declare <8 x i16> @llvm.masked.load.v8i16(ptr, i32, <8 x i1>, <8 x i16>) 130 131define <16 x i16> @test_mask_load_16xi16(<16 x i1> %mask, ptr %addr, <16 x i16> %val) { 132; CHECK-LABEL: test_mask_load_16xi16: 133; CHECK: ## %bb.0: 134; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 135; CHECK-NEXT: vpmovb2m %zmm0, %k0 136; CHECK-NEXT: kmovw %k0, %k1 137; CHECK-NEXT: vmovdqu16 (%rdi), %zmm0 {%k1} {z} 138; CHECK-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 139; CHECK-NEXT: retq 140 %res = call <16 x i16> @llvm.masked.load.v16i16(ptr %addr, i32 4, <16 x i1>%mask, <16 x i16> zeroinitializer) 141 ret <16 x i16> %res 142} 143declare <16 x i16> @llvm.masked.load.v16i16(ptr, i32, <16 x i1>, <16 x i16>) 144 145define void @test_mask_store_16xi8(<16 x i1> %mask, ptr %addr, <16 x i8> %val) { 146; CHECK-LABEL: test_mask_store_16xi8: 147; CHECK: ## %bb.0: 148; CHECK-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 149; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 150; CHECK-NEXT: vpmovb2m %zmm0, %k0 151; CHECK-NEXT: kmovw %k0, %k1 152; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1} 153; CHECK-NEXT: retq 154 call void @llvm.masked.store.v16i8(<16 x i8> %val, ptr %addr, i32 4, <16 x i1>%mask) 155 ret void 156} 157declare void @llvm.masked.store.v16i8(<16 x i8>, ptr, i32, <16 x i1>) 158 159define void @test_mask_store_32xi8(<32 x i1> %mask, ptr %addr, <32 x i8> %val) { 160; CHECK-LABEL: test_mask_store_32xi8: 161; CHECK: ## %bb.0: 162; CHECK-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 163; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0 164; CHECK-NEXT: vpmovb2m %zmm0, %k0 165; CHECK-NEXT: kmovd %k0, %k1 166; CHECK-NEXT: vmovdqu8 %zmm1, (%rdi) {%k1} 167; CHECK-NEXT: retq 168 call void @llvm.masked.store.v32i8(<32 x i8> %val, ptr %addr, i32 4, <32 x i1>%mask) 169 ret void 170} 171declare void @llvm.masked.store.v32i8(<32 x i8>, ptr, i32, <32 x i1>) 172 173define void @test_mask_store_8xi16(<8 x i1> %mask, ptr %addr, <8 x i16> %val) { 174; CHECK-LABEL: test_mask_store_8xi16: 175; CHECK: ## %bb.0: 176; CHECK-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 177; CHECK-NEXT: vpsllw $15, %xmm0, %xmm0 178; CHECK-NEXT: vpmovw2m %zmm0, %k0 179; CHECK-NEXT: kshiftld $24, %k0, %k0 180; CHECK-NEXT: kshiftrd $24, %k0, %k1 181; CHECK-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1} 182; CHECK-NEXT: retq 183 call void @llvm.masked.store.v8i16(<8 x i16> %val, ptr %addr, i32 4, <8 x i1>%mask) 184 ret void 185} 186declare void @llvm.masked.store.v8i16(<8 x i16>, ptr, i32, <8 x i1>) 187 188define void @test_mask_store_16xi16(<16 x i1> %mask, ptr %addr, <16 x i16> %val) { 189; CHECK-LABEL: test_mask_store_16xi16: 190; CHECK: ## %bb.0: 191; CHECK-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 192; CHECK-NEXT: vpsllw $7, %xmm0, %xmm0 193; CHECK-NEXT: vpmovb2m %zmm0, %k0 194; CHECK-NEXT: kmovw %k0, %k1 195; CHECK-NEXT: vmovdqu16 %zmm1, (%rdi) {%k1} 196; CHECK-NEXT: retq 197 call void @llvm.masked.store.v16i16(<16 x i16> %val, ptr %addr, i32 4, <16 x i1>%mask) 198 ret void 199} 200declare void @llvm.masked.store.v16i16(<16 x i16>, ptr, i32, <16 x i1>) 201