xref: /llvm-project/llvm/test/CodeGen/SystemZ/vec-perm-14.ll (revision d460c1de3b989cea919b9d60c21644f28f987950)
1; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
2
3; Test that no vperm of the vector compare is needed for the extracts.
4define void @fun() {
5; CHECK-LABEL: fun:
6; CHECK:       # %bb.0: # %bb
7; CHECK-NEXT:    vlrepf %v0, 0(%r1)
8; CHECK-NEXT:    vgbm %v1, 0
9; CHECK-NEXT:    vceqb %v0, %v0, %v1
10; CHECK-NEXT:    vuphb %v0, %v0
11; CHECK-NEXT:    vuphh %v0, %v0
12; CHECK-NEXT:    vlgvf %r0, %v0, 0
13; CHECK-NEXT:    tmll %r0, 1
14; CHECK-NEXT:    je .LBB0_2
15; CHECK-NEXT:  # %bb.1: # %bb1
16; CHECK-NEXT:  .LBB0_2: # %bb2
17; CHECK-NEXT:    vlgvf %r0, %v0, 1
18; CHECK-NEXT:    tmll %r0, 1
19; CHECK-NEXT:    je .LBB0_4
20; CHECK-NEXT:  # %bb.3: # %bb3
21; CHECK-NEXT:  .LBB0_4: # %bb4
22bb:
23  %tmp = load <4 x i8>, ptr undef
24  %tmp1 = icmp eq <4 x i8> zeroinitializer, %tmp
25  %tmp2 = extractelement <4 x i1> %tmp1, i32 0
26  br i1 %tmp2, label %bb1, label %bb2
27
28bb1:
29  unreachable
30
31bb2:
32  %tmp3 = extractelement <4 x i1> %tmp1, i32 1
33  br i1 %tmp3, label %bb3, label %bb4
34
35bb3:
36  unreachable
37
38bb4:
39  unreachable
40}
41
42; Test that a zero index in the permute vector is used instead of VGBM, with
43; a zero index into the other source operand.
44define <4 x i8> @fun1(<2 x i8> %arg) {
45; CHECK-LABEL:.LCPI1_0:
46; CHECK-NEXT:        .byte   1                       # 0x1
47; CHECK-NEXT:        .byte   18                      # 0x12
48; CHECK-NEXT:        .byte   0                       # 0x0
49; CHECK-NEXT:        .byte   18                      # 0x12
50; CHECK-NEXT:        .space  1
51; CHECK-NEXT:        .space  1
52; CHECK-NEXT:        .space  1
53; CHECK-NEXT:        .space  1
54; CHECK-NEXT:        .space  1
55; CHECK-NEXT:        .space  1
56; CHECK-NEXT:        .space  1
57; CHECK-NEXT:        .space  1
58; CHECK-NEXT:        .space  1
59; CHECK-NEXT:        .space  1
60; CHECK-NEXT:        .space  1
61; CHECK-NEXT:        .space  1
62; CHECK-NEXT:        .text
63; CHECK-NEXT:        .globl  fun1
64; CHECK-NEXT:        .p2align        4
65; CHECK-NEXT:        .type   fun1,@function
66; CHECK-NEXT: fun1:                                  # @fun1
67; CHECK-NEXT:        .cfi_startproc
68; CHECK-NEXT: # %bb.0:
69; CHECK-NEXT:        larl    %r1, .LCPI1_0
70; CHECK-NEXT:        vl      %v0, 0(%r1), 3
71; CHECK-NEXT:        vperm   %v24, %v24, %v0, %v0
72; CHECK-NEXT:        br      %r14
73   %res = shufflevector <2 x i8> %arg, <2 x i8> zeroinitializer,
74                        <4 x i32> <i32 1, i32 2, i32 0, i32 3>
75   ret <4 x i8> %res
76}
77
78; Same, but with the first byte indexing into an element of the zero vector.
79define <4 x i8> @fun2(<2 x i8> %arg) {
80; CHECK-LABEL:.LCPI2_0:
81; CHECK-NEXT:        .byte   0                       # 0x0
82; CHECK-NEXT:        .byte   17                      # 0x11
83; CHECK-NEXT:        .byte   17                      # 0x11
84; CHECK-NEXT:        .byte   0                       # 0x0
85; CHECK-NEXT:        .space  1
86; CHECK-NEXT:        .space  1
87; CHECK-NEXT:        .space  1
88; CHECK-NEXT:        .space  1
89; CHECK-NEXT:        .space  1
90; CHECK-NEXT:        .space  1
91; CHECK-NEXT:        .space  1
92; CHECK-NEXT:        .space  1
93; CHECK-NEXT:        .space  1
94; CHECK-NEXT:        .space  1
95; CHECK-NEXT:        .space  1
96; CHECK-NEXT:        .space  1
97; CHECK-NEXT:        .text
98; CHECK-NEXT:        .globl  fun2
99; CHECK-NEXT:        .p2align        4
100; CHECK-NEXT:        .type   fun2,@function
101; CHECK-NEXT:fun2:                                   # @fun2
102; CHECK-NEXT:        .cfi_startproc
103; CHECK-NEXT:# %bb.0:
104; CHECK-NEXT:        larl    %r1, .LCPI2_0
105; CHECK-NEXT:        vl      %v0, 0(%r1), 3
106; CHECK-NEXT:        vperm   %v24, %v0, %v24, %v0
107; CHECK-NEXT:        br      %r14
108   %res = shufflevector <2 x i8> %arg, <2 x i8> zeroinitializer,
109                        <4 x i32> <i32 3, i32 1, i32 1, i32 2>
110   ret <4 x i8> %res
111}
112