1; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s 2 3; Test that no vperm of the vector compare is needed for the extracts. 4define void @fun() { 5; CHECK-LABEL: fun: 6; CHECK: # %bb.0: # %bb 7; CHECK-NEXT: vlrepf %v0, 0(%r1) 8; CHECK-NEXT: vgbm %v1, 0 9; CHECK-NEXT: vceqb %v0, %v0, %v1 10; CHECK-NEXT: vuphb %v0, %v0 11; CHECK-NEXT: vuphh %v0, %v0 12; CHECK-NEXT: vlgvf %r0, %v0, 0 13; CHECK-NEXT: tmll %r0, 1 14; CHECK-NEXT: je .LBB0_2 15; CHECK-NEXT: # %bb.1: # %bb1 16; CHECK-NEXT: .LBB0_2: # %bb2 17; CHECK-NEXT: vlgvf %r0, %v0, 1 18; CHECK-NEXT: tmll %r0, 1 19; CHECK-NEXT: je .LBB0_4 20; CHECK-NEXT: # %bb.3: # %bb3 21; CHECK-NEXT: .LBB0_4: # %bb4 22bb: 23 %tmp = load <4 x i8>, ptr undef 24 %tmp1 = icmp eq <4 x i8> zeroinitializer, %tmp 25 %tmp2 = extractelement <4 x i1> %tmp1, i32 0 26 br i1 %tmp2, label %bb1, label %bb2 27 28bb1: 29 unreachable 30 31bb2: 32 %tmp3 = extractelement <4 x i1> %tmp1, i32 1 33 br i1 %tmp3, label %bb3, label %bb4 34 35bb3: 36 unreachable 37 38bb4: 39 unreachable 40} 41 42; Test that a zero index in the permute vector is used instead of VGBM, with 43; a zero index into the other source operand. 44define <4 x i8> @fun1(<2 x i8> %arg) { 45; CHECK-LABEL:.LCPI1_0: 46; CHECK-NEXT: .byte 1 # 0x1 47; CHECK-NEXT: .byte 18 # 0x12 48; CHECK-NEXT: .byte 0 # 0x0 49; CHECK-NEXT: .byte 18 # 0x12 50; CHECK-NEXT: .space 1 51; CHECK-NEXT: .space 1 52; CHECK-NEXT: .space 1 53; CHECK-NEXT: .space 1 54; CHECK-NEXT: .space 1 55; CHECK-NEXT: .space 1 56; CHECK-NEXT: .space 1 57; CHECK-NEXT: .space 1 58; CHECK-NEXT: .space 1 59; CHECK-NEXT: .space 1 60; CHECK-NEXT: .space 1 61; CHECK-NEXT: .space 1 62; CHECK-NEXT: .text 63; CHECK-NEXT: .globl fun1 64; CHECK-NEXT: .p2align 4 65; CHECK-NEXT: .type fun1,@function 66; CHECK-NEXT: fun1: # @fun1 67; CHECK-NEXT: .cfi_startproc 68; CHECK-NEXT: # %bb.0: 69; CHECK-NEXT: larl %r1, .LCPI1_0 70; CHECK-NEXT: vl %v0, 0(%r1), 3 71; CHECK-NEXT: vperm %v24, %v24, %v0, %v0 72; CHECK-NEXT: br %r14 73 %res = shufflevector <2 x i8> %arg, <2 x i8> zeroinitializer, 74 <4 x i32> <i32 1, i32 2, i32 0, i32 3> 75 ret <4 x i8> %res 76} 77 78; Same, but with the first byte indexing into an element of the zero vector. 79define <4 x i8> @fun2(<2 x i8> %arg) { 80; CHECK-LABEL:.LCPI2_0: 81; CHECK-NEXT: .byte 0 # 0x0 82; CHECK-NEXT: .byte 17 # 0x11 83; CHECK-NEXT: .byte 17 # 0x11 84; CHECK-NEXT: .byte 0 # 0x0 85; CHECK-NEXT: .space 1 86; CHECK-NEXT: .space 1 87; CHECK-NEXT: .space 1 88; CHECK-NEXT: .space 1 89; CHECK-NEXT: .space 1 90; CHECK-NEXT: .space 1 91; CHECK-NEXT: .space 1 92; CHECK-NEXT: .space 1 93; CHECK-NEXT: .space 1 94; CHECK-NEXT: .space 1 95; CHECK-NEXT: .space 1 96; CHECK-NEXT: .space 1 97; CHECK-NEXT: .text 98; CHECK-NEXT: .globl fun2 99; CHECK-NEXT: .p2align 4 100; CHECK-NEXT: .type fun2,@function 101; CHECK-NEXT:fun2: # @fun2 102; CHECK-NEXT: .cfi_startproc 103; CHECK-NEXT:# %bb.0: 104; CHECK-NEXT: larl %r1, .LCPI2_0 105; CHECK-NEXT: vl %v0, 0(%r1), 3 106; CHECK-NEXT: vperm %v24, %v0, %v24, %v0 107; CHECK-NEXT: br %r14 108 %res = shufflevector <2 x i8> %arg, <2 x i8> zeroinitializer, 109 <4 x i32> <i32 3, i32 1, i32 1, i32 2> 110 ret <4 x i8> %res 111} 112