1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-linux -mcpu=skylake-avx512 < %s | FileCheck %s 3 4define void @scatter_scale_512(ptr %result, <4 x i64> %idx, <4 x i1> %mask) { 5; CHECK-LABEL: scatter_scale_512: 6; CHECK: # %bb.0: 7; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 8; CHECK-NEXT: vpmovd2m %xmm1, %k1 9; CHECK-NEXT: vpsllq $9, %ymm0, %ymm0 10; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 11; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0) {%k1} 12; CHECK-NEXT: vzeroupper 13; CHECK-NEXT: retq 14 %gep = getelementptr inbounds [512 x i8], ptr %result, <4 x i64> %idx 15 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask) 16 ret void 17} 18 19define void @scatter_scale_16(ptr %result, <4 x i64> %idx, <4 x i1> %mask) { 20; CHECK-LABEL: scatter_scale_16: 21; CHECK: # %bb.0: 22; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 23; CHECK-NEXT: vpmovd2m %xmm1, %k1 24; CHECK-NEXT: vpsllq $4, %ymm0, %ymm0 25; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 26; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0) {%k1} 27; CHECK-NEXT: vzeroupper 28; CHECK-NEXT: retq 29 %gep = getelementptr inbounds [16 x i8], ptr %result, <4 x i64> %idx 30 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask) 31 ret void 32} 33 34define void @scatter_scale_8(ptr %result, <4 x i64> %idx, <4 x i1> %mask) { 35; CHECK-LABEL: scatter_scale_8: 36; CHECK: # %bb.0: 37; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 38; CHECK-NEXT: vpmovd2m %xmm1, %k1 39; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 40; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,8) {%k1} 41; CHECK-NEXT: vzeroupper 42; CHECK-NEXT: retq 43 %gep = getelementptr inbounds [8 x i8], ptr %result, <4 x i64> %idx 44 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask) 45 ret void 46} 47 48define void @scatter_scale_4(ptr %result, <4 x i64> %idx, <4 x i1> %mask) { 49; CHECK-LABEL: scatter_scale_4: 50; CHECK: # %bb.0: 51; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 52; CHECK-NEXT: vpmovd2m %xmm1, %k1 53; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 54; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0,4) {%k1} 55; CHECK-NEXT: vzeroupper 56; CHECK-NEXT: retq 57 %gep = getelementptr inbounds [4 x i8], ptr %result, <4 x i64> %idx 58 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask) 59 ret void 60} 61 62define void @scatter_scale_3(ptr %result, <4 x i64> %idx, <4 x i1> %mask) { 63; CHECK-LABEL: scatter_scale_3: 64; CHECK: # %bb.0: 65; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 66; CHECK-NEXT: vpmovd2m %xmm1, %k1 67; CHECK-NEXT: vpaddq %ymm0, %ymm0, %ymm1 68; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 69; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 70; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0) {%k1} 71; CHECK-NEXT: vzeroupper 72; CHECK-NEXT: retq 73 %gep = getelementptr inbounds [3 x i8], ptr %result, <4 x i64> %idx 74 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask) 75 ret void 76} 77 78define void @scatter_scale_1(ptr %result, <4 x i64> %idx, <4 x i1> %mask) { 79; CHECK-LABEL: scatter_scale_1: 80; CHECK: # %bb.0: 81; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 82; CHECK-NEXT: vpmovd2m %xmm1, %k1 83; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 84; CHECK-NEXT: vscatterqpd %ymm1, (%rdi,%ymm0) {%k1} 85; CHECK-NEXT: vzeroupper 86; CHECK-NEXT: retq 87 %gep = getelementptr inbounds [1 x i8], ptr %result, <4 x i64> %idx 88 call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> zeroinitializer, <4 x ptr> %gep, i32 0, <4 x i1> %mask) 89 ret void 90} 91 92define <4 x double> @gather_scale_512(ptr %result, <4 x i64> %idx, <4 x i1> %mask) { 93; CHECK-LABEL: gather_scale_512: 94; CHECK: # %bb.0: 95; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 96; CHECK-NEXT: vpmovd2m %xmm1, %k1 97; CHECK-NEXT: vpsllq $9, %ymm0, %ymm1 98; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 99; CHECK-NEXT: vgatherqpd (%rdi,%ymm1), %ymm0 {%k1} 100; CHECK-NEXT: retq 101 %gep = getelementptr inbounds [512 x i8], ptr %result, <4 x i64> %idx 102 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer) 103 ret <4 x double> %res 104} 105 106define <4 x double> @gather_scale_16(ptr %result, <4 x i64> %idx, <4 x i1> %mask) { 107; CHECK-LABEL: gather_scale_16: 108; CHECK: # %bb.0: 109; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 110; CHECK-NEXT: vpmovd2m %xmm1, %k1 111; CHECK-NEXT: vpsllq $4, %ymm0, %ymm1 112; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 113; CHECK-NEXT: vgatherqpd (%rdi,%ymm1), %ymm0 {%k1} 114; CHECK-NEXT: retq 115 %gep = getelementptr inbounds [16 x i8], ptr %result, <4 x i64> %idx 116 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer) 117 ret <4 x double> %res 118} 119 120define <4 x double> @gather_scale_8(ptr %result, <4 x i64> %idx, <4 x i1> %mask) { 121; CHECK-LABEL: gather_scale_8: 122; CHECK: # %bb.0: 123; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 124; CHECK-NEXT: vpmovd2m %xmm1, %k1 125; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 126; CHECK-NEXT: vgatherqpd (%rdi,%ymm0,8), %ymm1 {%k1} 127; CHECK-NEXT: vmovapd %ymm1, %ymm0 128; CHECK-NEXT: retq 129 %gep = getelementptr inbounds [8 x i8], ptr %result, <4 x i64> %idx 130 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer) 131 ret <4 x double> %res 132} 133 134define <4 x double> @gather_scale_4(ptr %result, <4 x i64> %idx, <4 x i1> %mask) { 135; CHECK-LABEL: gather_scale_4: 136; CHECK: # %bb.0: 137; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 138; CHECK-NEXT: vpmovd2m %xmm1, %k1 139; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 140; CHECK-NEXT: vgatherqpd (%rdi,%ymm0,4), %ymm1 {%k1} 141; CHECK-NEXT: vmovapd %ymm1, %ymm0 142; CHECK-NEXT: retq 143 %gep = getelementptr inbounds [4 x i8], ptr %result, <4 x i64> %idx 144 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer) 145 ret <4 x double> %res 146} 147 148define <4 x double> @gather_scale_3(ptr %result, <4 x i64> %idx, <4 x i1> %mask) { 149; CHECK-LABEL: gather_scale_3: 150; CHECK: # %bb.0: 151; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 152; CHECK-NEXT: vpmovd2m %xmm1, %k1 153; CHECK-NEXT: vpaddq %ymm0, %ymm0, %ymm1 154; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm1 155; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 156; CHECK-NEXT: vgatherqpd (%rdi,%ymm1), %ymm0 {%k1} 157; CHECK-NEXT: retq 158 %gep = getelementptr inbounds [3 x i8], ptr %result, <4 x i64> %idx 159 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer) 160 ret <4 x double> %res 161} 162 163define <4 x double> @gather_scale_1(ptr %result, <4 x i64> %idx, <4 x i1> %mask) { 164; CHECK-LABEL: gather_scale_1: 165; CHECK: # %bb.0: 166; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 167; CHECK-NEXT: vpmovd2m %xmm1, %k1 168; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 169; CHECK-NEXT: vgatherqpd (%rdi,%ymm0), %ymm1 {%k1} 170; CHECK-NEXT: vmovapd %ymm1, %ymm0 171; CHECK-NEXT: retq 172 %gep = getelementptr inbounds [1 x i8], ptr %result, <4 x i64> %idx 173 %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %gep, i32 0, <4 x i1> %mask, <4 x double> zeroinitializer) 174 ret <4 x double> %res 175} 176 177declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32 immarg, <4 x i1>) 178declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x double>) 179