1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver3 | FileCheck %s --check-prefix=X64 3 4define <8 x i32> @simple(ptr %base, <8 x i32> %offsets) { 5; X64-LABEL: simple: 6; X64: # %bb.0: 7; X64-NEXT: vextracti128 $1, %ymm0, %xmm2 8; X64-NEXT: vpmovsxdq %xmm0, %ymm0 9; X64-NEXT: vmovq %rdi, %xmm1 10; X64-NEXT: vpbroadcastq %xmm1, %ymm1 11; X64-NEXT: vpmovsxdq %xmm2, %ymm2 12; X64-NEXT: vpsllq $2, %ymm0, %ymm0 13; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 14; X64-NEXT: vmovq %xmm0, %rax 15; X64-NEXT: vpextrq $1, %xmm0, %rcx 16; X64-NEXT: vextracti128 $1, %ymm0, %xmm0 17; X64-NEXT: vpsllq $2, %ymm2, %ymm2 18; X64-NEXT: vpaddq %ymm2, %ymm1, %ymm2 19; X64-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 20; X64-NEXT: vpextrq $1, %xmm0, %rdx 21; X64-NEXT: vmovq %xmm0, %rsi 22; X64-NEXT: vextracti128 $1, %ymm2, %xmm0 23; X64-NEXT: vmovq %xmm2, %rdi 24; X64-NEXT: vpextrq $1, %xmm2, %r8 25; X64-NEXT: vpinsrd $1, (%rcx), %xmm1, %xmm1 26; X64-NEXT: vmovq %xmm0, %r9 27; X64-NEXT: vpextrq $1, %xmm0, %r10 28; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 29; X64-NEXT: vpinsrd $2, (%rsi), %xmm1, %xmm1 30; X64-NEXT: vpinsrd $1, (%r8), %xmm0, %xmm0 31; X64-NEXT: vpinsrd $3, (%rdx), %xmm1, %xmm1 32; X64-NEXT: vpinsrd $2, (%r9), %xmm0, %xmm0 33; X64-NEXT: vpinsrd $3, (%r10), %xmm0, %xmm0 34; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 35; X64-NEXT: retq 36 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %offsets 37 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) 38 ret <8 x i32> %wide.masked.gather 39} 40 41define <8 x i32> @optsize(ptr %base, <8 x i32> %offsets) optsize { 42; X64-LABEL: optsize: 43; X64: # %bb.0: 44; X64-NEXT: vextracti128 $1, %ymm0, %xmm2 45; X64-NEXT: vpmovsxdq %xmm0, %ymm0 46; X64-NEXT: vmovq %rdi, %xmm1 47; X64-NEXT: vpbroadcastq %xmm1, %ymm1 48; X64-NEXT: vpmovsxdq %xmm2, %ymm2 49; X64-NEXT: vpsllq $2, %ymm0, %ymm0 50; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 51; X64-NEXT: vmovq %xmm0, %rax 52; X64-NEXT: vpextrq $1, %xmm0, %rcx 53; X64-NEXT: vextracti128 $1, %ymm0, %xmm0 54; X64-NEXT: vpsllq $2, %ymm2, %ymm2 55; X64-NEXT: vpaddq %ymm2, %ymm1, %ymm2 56; X64-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 57; X64-NEXT: vpextrq $1, %xmm0, %rdx 58; X64-NEXT: vmovq %xmm0, %rsi 59; X64-NEXT: vextracti128 $1, %ymm2, %xmm0 60; X64-NEXT: vmovq %xmm2, %rdi 61; X64-NEXT: vpextrq $1, %xmm2, %r8 62; X64-NEXT: vpinsrd $1, (%rcx), %xmm1, %xmm1 63; X64-NEXT: vmovq %xmm0, %r9 64; X64-NEXT: vpextrq $1, %xmm0, %r10 65; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 66; X64-NEXT: vpinsrd $2, (%rsi), %xmm1, %xmm1 67; X64-NEXT: vpinsrd $1, (%r8), %xmm0, %xmm0 68; X64-NEXT: vpinsrd $3, (%rdx), %xmm1, %xmm1 69; X64-NEXT: vpinsrd $2, (%r9), %xmm0, %xmm0 70; X64-NEXT: vpinsrd $3, (%r10), %xmm0, %xmm0 71; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 72; X64-NEXT: retq 73 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %offsets 74 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) 75 ret <8 x i32> %wide.masked.gather 76} 77 78define <8 x i32> @minsize(ptr %base, <8 x i32> %offsets) minsize { 79; X64-LABEL: minsize: 80; X64: # %bb.0: 81; X64-NEXT: vextracti128 $1, %ymm0, %xmm2 82; X64-NEXT: vpmovsxdq %xmm0, %ymm0 83; X64-NEXT: vmovq %rdi, %xmm1 84; X64-NEXT: vpbroadcastq %xmm1, %ymm1 85; X64-NEXT: vpmovsxdq %xmm2, %ymm2 86; X64-NEXT: vpsllq $2, %ymm0, %ymm0 87; X64-NEXT: vpaddq %ymm0, %ymm1, %ymm0 88; X64-NEXT: vmovq %xmm0, %rax 89; X64-NEXT: vpextrq $1, %xmm0, %rcx 90; X64-NEXT: vextracti128 $1, %ymm0, %xmm0 91; X64-NEXT: vpsllq $2, %ymm2, %ymm2 92; X64-NEXT: vpaddq %ymm2, %ymm1, %ymm2 93; X64-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero 94; X64-NEXT: vpextrq $1, %xmm0, %rdx 95; X64-NEXT: vmovq %xmm0, %rsi 96; X64-NEXT: vextracti128 $1, %ymm2, %xmm0 97; X64-NEXT: vmovq %xmm2, %rdi 98; X64-NEXT: vpextrq $1, %xmm2, %r8 99; X64-NEXT: vpinsrd $1, (%rcx), %xmm1, %xmm1 100; X64-NEXT: vmovq %xmm0, %r9 101; X64-NEXT: vpextrq $1, %xmm0, %r10 102; X64-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 103; X64-NEXT: vpinsrd $2, (%rsi), %xmm1, %xmm1 104; X64-NEXT: vpinsrd $1, (%r8), %xmm0, %xmm0 105; X64-NEXT: vpinsrd $3, (%rdx), %xmm1, %xmm1 106; X64-NEXT: vpinsrd $2, (%r9), %xmm0, %xmm0 107; X64-NEXT: vpinsrd $3, (%r10), %xmm0, %xmm0 108; X64-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 109; X64-NEXT: retq 110 %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %offsets 111 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) 112 ret <8 x i32> %wide.masked.gather 113} 114 115declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32 immarg, <8 x i1>, <8 x i32>) 116