xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll (revision 01a15dca09e56dce850ab6fb3ecddfb3f8c6c172)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zve32x,+zvl1024b -O2 | FileCheck %s -check-prefix=ZVE32X
3; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zve64x,+zvl1024b -O2 | FileCheck %s -check-prefix=ZVE64X
4
5; TODO: Currently we don't lower interleaved accesses of ptr types if XLEN isn't
6; a supported SEW. We should improve this with a wide load and a set of shuffles.
7define <4 x i1> @load_large_vector(ptr %p) {
8; ZVE32X-LABEL: load_large_vector:
9; ZVE32X:       # %bb.0:
10; ZVE32X-NEXT:    ld a1, 0(a0)
11; ZVE32X-NEXT:    ld a2, 8(a0)
12; ZVE32X-NEXT:    ld a3, 24(a0)
13; ZVE32X-NEXT:    ld a4, 32(a0)
14; ZVE32X-NEXT:    ld a5, 48(a0)
15; ZVE32X-NEXT:    ld a6, 56(a0)
16; ZVE32X-NEXT:    ld a7, 72(a0)
17; ZVE32X-NEXT:    ld a0, 80(a0)
18; ZVE32X-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
19; ZVE32X-NEXT:    vmv.s.x v8, zero
20; ZVE32X-NEXT:    vmv.v.i v9, 0
21; ZVE32X-NEXT:    xor a3, a3, a4
22; ZVE32X-NEXT:    xor a1, a1, a2
23; ZVE32X-NEXT:    xor a2, a5, a6
24; ZVE32X-NEXT:    xor a0, a7, a0
25; ZVE32X-NEXT:    snez a3, a3
26; ZVE32X-NEXT:    snez a1, a1
27; ZVE32X-NEXT:    vmv.s.x v10, a3
28; ZVE32X-NEXT:    vmv.s.x v11, a1
29; ZVE32X-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
30; ZVE32X-NEXT:    vand.vi v10, v10, 1
31; ZVE32X-NEXT:    vmsne.vi v0, v10, 0
32; ZVE32X-NEXT:    vand.vi v10, v11, 1
33; ZVE32X-NEXT:    vmerge.vim v11, v8, 1, v0
34; ZVE32X-NEXT:    vmsne.vi v0, v10, 0
35; ZVE32X-NEXT:    snez a1, a2
36; ZVE32X-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
37; ZVE32X-NEXT:    vmerge.vim v10, v9, 1, v0
38; ZVE32X-NEXT:    vsetivli zero, 2, e8, mf4, tu, ma
39; ZVE32X-NEXT:    vslideup.vi v10, v11, 1
40; ZVE32X-NEXT:    vmv.s.x v11, a1
41; ZVE32X-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
42; ZVE32X-NEXT:    vmsne.vi v0, v10, 0
43; ZVE32X-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
44; ZVE32X-NEXT:    vand.vi v10, v11, 1
45; ZVE32X-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
46; ZVE32X-NEXT:    vmerge.vim v11, v9, 1, v0
47; ZVE32X-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
48; ZVE32X-NEXT:    vmsne.vi v0, v10, 0
49; ZVE32X-NEXT:    snez a0, a0
50; ZVE32X-NEXT:    vmerge.vim v10, v8, 1, v0
51; ZVE32X-NEXT:    vsetivli zero, 3, e8, mf4, tu, ma
52; ZVE32X-NEXT:    vslideup.vi v11, v10, 2
53; ZVE32X-NEXT:    vmv.s.x v10, a0
54; ZVE32X-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
55; ZVE32X-NEXT:    vmsne.vi v0, v11, 0
56; ZVE32X-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
57; ZVE32X-NEXT:    vand.vi v10, v10, 1
58; ZVE32X-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
59; ZVE32X-NEXT:    vmerge.vim v9, v9, 1, v0
60; ZVE32X-NEXT:    vsetivli zero, 1, e8, mf4, ta, ma
61; ZVE32X-NEXT:    vmsne.vi v0, v10, 0
62; ZVE32X-NEXT:    vmerge.vim v8, v8, 1, v0
63; ZVE32X-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
64; ZVE32X-NEXT:    vslideup.vi v9, v8, 3
65; ZVE32X-NEXT:    vmsne.vi v0, v9, 0
66; ZVE32X-NEXT:    ret
67;
68; ZVE64X-LABEL: load_large_vector:
69; ZVE64X:       # %bb.0:
70; ZVE64X-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
71; ZVE64X-NEXT:    vlseg3e64.v v8, (a0)
72; ZVE64X-NEXT:    vmsne.vv v0, v8, v9
73; ZVE64X-NEXT:    ret
74  %l = load <12 x ptr>, ptr %p
75  %s1 = shufflevector <12 x ptr> %l, <12 x ptr> poison, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
76  %s2 = shufflevector <12 x ptr> %l, <12 x ptr> poison, <4 x i32> <i32 1, i32 4, i32 7, i32 10>
77  %ret = icmp ne <4 x ptr> %s1, %s2
78  ret <4 x i1> %ret
79}
80